// Code generated by command: go run asm.go -out allocfail.s -stubs stubs.go. DO NOT EDIT. // +build !appengine // +build !noasm // +build gc #include "textflag.h" // func encodeBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm(SB), $65568-56 MOVQ $0x00000200, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm: MOVQ (CX)(AX*1), SI MOVL AX, BX SUBL 20(SP), BX SHRL $0x06, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), DI CMPL BX, DI JGT emit_remainder_encodeBlockAsm MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ SI, R8 MOVQ SI, R9 SHRQ $0x08, R9 SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x30, R8 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x30, R9 MOVL 32(SP)(R8*1), BX MOVL 32(SP)(R9*1), DI MOVL AX, 32(SP)(R8*1) LEAL 1(AX), R8 MOVL R8, 32(SP)(R9*1) MOVL AX, R8 SUBL 24(SP), R8 MOVL 1(CX)(R8*1), R10 MOVQ SI, R9 SHLQ $0x08, R9 CMPL R9, R10 JNE no_repeat_found_encodeBlockAsm LEAQ 1(AX), SI MOVL 20(SP), BX TESTL R8, R8 JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: CMPL SI, BX JG repeat_extend_back_end_encodeBlockAsm MOVB -1(CX)(R8*1), DL MOVB -1(CX)(SI*1), DI CMPB DL, DI JNE repeat_extend_back_end_encodeBlockAsm LEAQ -1(SI), SI DECL R8 JZ repeat_extend_back_end_encodeBlockAsm JMP repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: MOVL 20(SP), BX CMPL BX, SI JEQ emit_literal_skip_repeat_emit_encodeBlockAsm MOVL SI, DI MOVL SI, 20(SP) LEAQ (CX)(BX*1), R8 SUBL BX, DI MOVQ dst_base+0(FP), BX MOVQ DI, R9 SUBL $0x01, R9 JC emit_literal_done_repeat_emit_encodeBlockAsm CMPL R9, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm CMPL R9, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm CMPL R9, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm CMPL R9, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (BX) MOVL R9, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (BX) MOVW R9, 1(BX) MOVB R10, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (BX) MOVW R9, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (BX) MOVB R9, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: SHLB $0x02, R9 MOVB R9, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm: LEAQ (BX)(DI*1), R9 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (BX) MOVB R8, -1(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4: MOVL (R8), R9 MOVL R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (BX) MOVB R8, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (BX) MOVL R8, -4(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (BX) MOVQ R8, -8(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(DI*1) MOVOU X3, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(DI*1) MOVOU X9, -112(BX)(DI*1) MOVOU X10, -96(BX)(DI*1) MOVOU X11, -80(BX)(DI*1) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail MOVQ R9, BX emit_literal_done_repeat_emit_encodeBlockAsm: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ R8, R8 CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_repeat_extend BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(R8), R8 CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm matchlen_single_loopback_repeat_extend: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE repeat_extend_forward_end_encodeBlockAsm LEAQ 1(R8), R8 DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm: ADDL R8, AX MOVL AX, BX SUBL SI, BX MOVL 24(SP), SI MOVQ dst_base+0(FP), DI MOVL 20(SP), R8 TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm emit_repeat_again_match_repeat_: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_match_repeat_ CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(BX), BX ADDQ $0x05, DI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(BX), BX ADDQ $0x03, DI emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI repeat_end_emit_encodeBlockAsm: MOVQ DI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ SI, R8 SHRQ $0x10, R8 SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x30, R8 CMPL (CX)(BX*1), SI SHRQ $0x08, SI JEQ candidate_match_encodeBlockAsm MOVL 32(SP)(R8*1), BX CMPL (CX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm LEAQ 2(AX), DI MOVL DI, 32(SP)(R8*1) SHRQ $0x08, SI CMPL (CX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm MOVL 28(SP), AX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: LEAQ -2(AX), BX MOVL BX, 32(SP)(R8*1) INCL AX MOVL DI, BX candidate_match_encodeBlockAsm: MOVL 20(SP), SI TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: CMPL AX, SI JG match_extend_back_end_encodeBlockAsm MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), DI CMPB DL, DI JNE match_extend_back_end_encodeBlockAsm LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: MOVL AX, SI SUBL 20(SP), SI LEAQ dst_base+0(FP)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm: MOVL BX, SI MOVL 20(SP), DI CMPL DI, SI JEQ emit_literal_skip_match_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 20(SP) LEAQ (CX)(DI*1), SI SUBL DI, R8 MOVQ dst_base+0(FP), DI MOVQ R8, R9 SUBL $0x01, R9 JC emit_literal_done_match_emit_encodeBlockAsm CMPL R9, $0x3c JLT one_byte_match_emit_encodeBlockAsm CMPL R9, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm CMPL R9, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm CMPL R9, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm MOVB $0xfc, (DI) MOVL R9, 1(DI) ADDQ $0x05, DI JMP memmove_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (DI) MOVW R9, 1(DI) MOVB R10, 3(DI) ADDQ $0x04, DI JMP memmove_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: MOVB $0xf4, (DI) MOVW R9, 1(DI) ADDQ $0x03, DI JMP memmove_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: MOVB $0xf0, (DI) MOVB R9, 1(DI) ADDQ $0x02, DI JMP memmove_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: SHLB $0x02, R9 MOVB R9, (DI) ADDQ $0x01, DI memmove_match_emit_encodeBlockAsm: LEAQ (DI)(R8*1), R9 NOP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail: TESTQ R8, R8 JEQ emit_literal_done_match_emit_encodeBlockAsm CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (DI) MOVB SI, -1(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4: MOVL (SI), R9 MOVL R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (DI) MOVB SI, 2(DI) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (DI) MOVL SI, -4(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (DI) MOVQ SI, -8(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (DI) MOVOU X1, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, -32(DI)(R8*1) MOVOU X3, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, -128(DI)(R8*1) MOVOU X9, -112(DI)(R8*1) MOVOU X10, -96(DI)(R8*1) MOVOU X11, -80(DI)(R8*1) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, 128(DI) MOVOU X9, 144(DI) MOVOU X10, 160(DI) MOVOU X11, 176(DI) MOVOU X12, 192(DI) MOVOU X13, 208(DI) MOVOU X14, 224(DI) MOVOU X15, 240(DI) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(DI), DI JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail MOVQ R9, DI emit_literal_done_match_emit_encodeBlockAsm: MOVQ DI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm: NOP match_nolit_loop_encodeBlockAsm: MOVL AX, SI MOVL AX, SI SUBL BX, SI MOVL SI, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), SI SUBL AX, SI XORQ R8, R8 CMPQ SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm matchlen_loopback_match_nolit_encodeBlockAsm: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_match_nolit_encodeBlockAsm BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP match_nolit_end_encodeBlockAsm matchlen_loop_match_nolit_encodeBlockAsm: LEAQ -8(SI), SI LEAQ 8(R8), R8 CMPQ SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm matchlen_single_match_nolit_encodeBlockAsm: TESTQ SI, SI JZ match_nolit_end_encodeBlockAsm matchlen_single_loopback_match_nolit_encodeBlockAsm: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE match_nolit_end_encodeBlockAsm LEAQ 1(R8), R8 DECQ SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm match_nolit_end_encodeBlockAsm: MOVL 24(SP), SI ADDQ $0x04, R8 MOVQ dst_base+0(FP), DI ADDL R8, AX CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm CMPL R8, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(R8), R8 ADDQ $0x05, DI CMPL R8, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: TESTL R8, R8 JZ match_nolit_emitcopy_end_encodeBlockAsm MOVB $0x03, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: CMPL R8, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(R8), R8 ADDQ $0x03, DI emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: CMPL R8, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm MOVB $0x01, DL LEAQ -16(DX)(R8*4), R8 MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: MOVB $0x02, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI match_nolit_emitcopy_end_encodeBlockAsm: MOVQ DI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm CMPQ DI, (SP) JL match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: MOVQ -2(CX)(AX*1), SI MOVQ $0x0000cf1bbcdcbf9b, DI MOVQ SI, R8 SHRQ $0x10, SI MOVQ SI, R9 SHLQ $0x10, R8 IMULQ DI, R8 SHRQ $0x30, R8 SHLQ $0x10, R9 IMULQ DI, R9 SHRQ $0x30, R9 MOVL 32(SP)(R8*1), DI MOVL 32(SP)(R9*1), DI LEAQ -2(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, 32(SP)(R9*1) CMPL (CX)(R9*1), SI JEQ match_nolit_loop_encodeBlockAsm INCL AX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVQ DX, SI SHRL $0x10, SI MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB SI, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), AL MOVB DL, (CX) MOVB AL, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: MOVW (AX), DX MOVB 2(AX), AL MOVW DX, (CX) MOVB AL, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), AX MOVL DX, (CX) MOVL AX, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), AX MOVQ DX, (CX) MOVQ AX, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm14B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm14B(SB), $16416-56 MOVQ $0x00000080, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm14B: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm14B MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm14B: MOVQ (CX)(AX*1), SI MOVL AX, BX SUBL 20(SP), BX SHRL $0x05, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), DI CMPL BX, DI JGT emit_remainder_encodeBlockAsm14B MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ SI, R8 MOVQ SI, R9 SHRQ $0x08, R9 SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x32, R8 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x32, R9 MOVL 32(SP)(R8*1), BX MOVL 32(SP)(R9*1), DI MOVL AX, 32(SP)(R8*1) LEAL 1(AX), R8 MOVL R8, 32(SP)(R9*1) MOVL AX, R8 SUBL 24(SP), R8 MOVL 1(CX)(R8*1), R10 MOVQ SI, R9 SHLQ $0x08, R9 CMPL R9, R10 JNE no_repeat_found_encodeBlockAsm14B LEAQ 1(AX), SI MOVL 20(SP), BX TESTL R8, R8 JZ repeat_extend_back_end_encodeBlockAsm14B repeat_extend_back_loop_encodeBlockAsm14B: CMPL SI, BX JG repeat_extend_back_end_encodeBlockAsm14B MOVB -1(CX)(R8*1), DL MOVB -1(CX)(SI*1), DI CMPB DL, DI JNE repeat_extend_back_end_encodeBlockAsm14B LEAQ -1(SI), SI DECL R8 JZ repeat_extend_back_end_encodeBlockAsm14B JMP repeat_extend_back_loop_encodeBlockAsm14B repeat_extend_back_end_encodeBlockAsm14B: MOVL 20(SP), BX CMPL BX, SI JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14B MOVL SI, DI MOVL SI, 20(SP) LEAQ (CX)(BX*1), R8 SUBL BX, DI MOVQ dst_base+0(FP), BX MOVQ DI, R9 SUBL $0x01, R9 JC emit_literal_done_repeat_emit_encodeBlockAsm14B CMPL R9, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm14B CMPL R9, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm14B CMPL R9, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm14B CMPL R9, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm14B MOVB $0xfc, (BX) MOVL R9, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm14B four_bytes_repeat_emit_encodeBlockAsm14B: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (BX) MOVW R9, 1(BX) MOVB R10, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm14B three_bytes_repeat_emit_encodeBlockAsm14B: MOVB $0xf4, (BX) MOVW R9, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm14B two_bytes_repeat_emit_encodeBlockAsm14B: MOVB $0xf0, (BX) MOVB R9, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm14B one_byte_repeat_emit_encodeBlockAsm14B: SHLB $0x02, R9 MOVB R9, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm14B: LEAQ (BX)(DI*1), R9 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm14B CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (BX) MOVB R8, -1(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4: MOVL (R8), R9 MOVL R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (BX) MOVB R8, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (BX) MOVL R8, -4(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (BX) MOVQ R8, -8(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(DI*1) MOVOU X3, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(DI*1) MOVOU X9, -112(BX)(DI*1) MOVOU X10, -96(BX)(DI*1) MOVOU X11, -80(BX)(DI*1) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail MOVQ R9, BX emit_literal_done_repeat_emit_encodeBlockAsm14B: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm14B: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ R8, R8 CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_repeat_extend BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP repeat_extend_forward_end_encodeBlockAsm14B matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(R8), R8 CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm14B matchlen_single_loopback_repeat_extend: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE repeat_extend_forward_end_encodeBlockAsm14B LEAQ 1(R8), R8 DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm14B: ADDL R8, AX MOVL AX, BX SUBL SI, BX MOVL 24(SP), SI MOVQ dst_base+0(FP), DI MOVL 20(SP), R8 TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm14B emit_repeat_again_match_repeat_: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_match_repeat_ CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_offset_match_repeat_: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_as_copy_encodeBlockAsm14B: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm14B CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14B MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(BX), BX ADDQ $0x05, DI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14B emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B four_bytes_remain_repeat_as_copy_encodeBlockAsm14B: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm14B MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14B two_byte_offset_repeat_as_copy_encodeBlockAsm14B: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(BX), BX ADDQ $0x03, DI emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14B emit_copy_three_repeat_as_copy_encodeBlockAsm14B: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI repeat_end_emit_encodeBlockAsm14B: MOVQ DI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm14B JMP search_loop_encodeBlockAsm14B no_repeat_found_encodeBlockAsm14B: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ SI, R8 SHRQ $0x10, R8 SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x32, R8 CMPL (CX)(BX*1), SI SHRQ $0x08, SI JEQ candidate_match_encodeBlockAsm14B MOVL 32(SP)(R8*1), BX CMPL (CX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm14B LEAQ 2(AX), DI MOVL DI, 32(SP)(R8*1) SHRQ $0x08, SI CMPL (CX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm14B MOVL 28(SP), AX JMP search_loop_encodeBlockAsm14B candidate3_match_encodeBlockAsm14B: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm14B candidate2_match_encodeBlockAsm14B: LEAQ -2(AX), BX MOVL BX, 32(SP)(R8*1) INCL AX MOVL DI, BX candidate_match_encodeBlockAsm14B: MOVL 20(SP), SI TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm14B match_extend_back_loop_encodeBlockAsm14B: CMPL AX, SI JG match_extend_back_end_encodeBlockAsm14B MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), DI CMPB DL, DI JNE match_extend_back_end_encodeBlockAsm14B LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm14B JMP match_extend_back_loop_encodeBlockAsm14B match_extend_back_end_encodeBlockAsm14B: MOVL AX, SI SUBL 20(SP), SI LEAQ dst_base+0(FP)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm14B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm14B: MOVL BX, SI MOVL 20(SP), DI CMPL DI, SI JEQ emit_literal_skip_match_emit_encodeBlockAsm14B MOVL SI, R8 MOVL SI, 20(SP) LEAQ (CX)(DI*1), SI SUBL DI, R8 MOVQ dst_base+0(FP), DI MOVQ R8, R9 SUBL $0x01, R9 JC emit_literal_done_match_emit_encodeBlockAsm14B CMPL R9, $0x3c JLT one_byte_match_emit_encodeBlockAsm14B CMPL R9, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm14B CMPL R9, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm14B CMPL R9, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm14B MOVB $0xfc, (DI) MOVL R9, 1(DI) ADDQ $0x05, DI JMP memmove_match_emit_encodeBlockAsm14B four_bytes_match_emit_encodeBlockAsm14B: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (DI) MOVW R9, 1(DI) MOVB R10, 3(DI) ADDQ $0x04, DI JMP memmove_match_emit_encodeBlockAsm14B three_bytes_match_emit_encodeBlockAsm14B: MOVB $0xf4, (DI) MOVW R9, 1(DI) ADDQ $0x03, DI JMP memmove_match_emit_encodeBlockAsm14B two_bytes_match_emit_encodeBlockAsm14B: MOVB $0xf0, (DI) MOVB R9, 1(DI) ADDQ $0x02, DI JMP memmove_match_emit_encodeBlockAsm14B one_byte_match_emit_encodeBlockAsm14B: SHLB $0x02, R9 MOVB R9, (DI) ADDQ $0x01, DI memmove_match_emit_encodeBlockAsm14B: LEAQ (DI)(R8*1), R9 NOP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail: TESTQ R8, R8 JEQ emit_literal_done_match_emit_encodeBlockAsm14B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (DI) MOVB SI, -1(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4: MOVL (SI), R9 MOVL R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (DI) MOVB SI, 2(DI) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (DI) MOVL SI, -4(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (DI) MOVQ SI, -8(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (DI) MOVOU X1, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, -32(DI)(R8*1) MOVOU X3, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, -128(DI)(R8*1) MOVOU X9, -112(DI)(R8*1) MOVOU X10, -96(DI)(R8*1) MOVOU X11, -80(DI)(R8*1) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, 128(DI) MOVOU X9, 144(DI) MOVOU X10, 160(DI) MOVOU X11, 176(DI) MOVOU X12, 192(DI) MOVOU X13, 208(DI) MOVOU X14, 224(DI) MOVOU X15, 240(DI) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(DI), DI JGE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail MOVQ R9, DI emit_literal_done_match_emit_encodeBlockAsm14B: MOVQ DI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm14B: NOP match_nolit_loop_encodeBlockAsm14B: MOVL AX, SI MOVL AX, SI SUBL BX, SI MOVL SI, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), SI SUBL AX, SI XORQ R8, R8 CMPQ SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm14B matchlen_loopback_match_nolit_encodeBlockAsm14B: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_match_nolit_encodeBlockAsm14B BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP match_nolit_end_encodeBlockAsm14B matchlen_loop_match_nolit_encodeBlockAsm14B: LEAQ -8(SI), SI LEAQ 8(R8), R8 CMPQ SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm14B matchlen_single_match_nolit_encodeBlockAsm14B: TESTQ SI, SI JZ match_nolit_end_encodeBlockAsm14B matchlen_single_loopback_match_nolit_encodeBlockAsm14B: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE match_nolit_end_encodeBlockAsm14B LEAQ 1(R8), R8 DECQ SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14B match_nolit_end_encodeBlockAsm14B: MOVL 24(SP), SI ADDQ $0x04, R8 MOVQ dst_base+0(FP), DI ADDL R8, AX CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm14B CMPL R8, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm14B MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(R8), R8 ADDQ $0x05, DI CMPL R8, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm14B emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy repeat_five_match_nolit_encodeBlockAsm14B_emit_copy: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_four_match_nolit_encodeBlockAsm14B_emit_copy: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_three_match_nolit_encodeBlockAsm14B_emit_copy: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_match_nolit_encodeBlockAsm14B_emit_copy: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B four_bytes_remain_match_nolit_encodeBlockAsm14B: TESTL R8, R8 JZ match_nolit_emitcopy_end_encodeBlockAsm14B MOVB $0x03, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B two_byte_offset_match_nolit_encodeBlockAsm14B: CMPL R8, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm14B MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(R8), R8 ADDQ $0x03, DI emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B two_byte_offset_short_match_nolit_encodeBlockAsm14B: CMPL R8, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm14B CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm14B MOVB $0x01, DL LEAQ -16(DX)(R8*4), R8 MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14B emit_copy_three_match_nolit_encodeBlockAsm14B: MOVB $0x02, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI match_nolit_emitcopy_end_encodeBlockAsm14B: MOVQ DI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm14B CMPQ DI, (SP) JL match_nolit_dst_ok_encodeBlockAsm14B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm14B: MOVQ -2(CX)(AX*1), SI MOVQ $0x0000cf1bbcdcbf9b, DI MOVQ SI, R8 SHRQ $0x10, SI MOVQ SI, R9 SHLQ $0x10, R8 IMULQ DI, R8 SHRQ $0x32, R8 SHLQ $0x10, R9 IMULQ DI, R9 SHRQ $0x32, R9 MOVL 32(SP)(R8*1), DI MOVL 32(SP)(R9*1), DI LEAQ -2(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, 32(SP)(R9*1) CMPL (CX)(R9*1), SI JEQ match_nolit_loop_encodeBlockAsm14B INCL AX JMP search_loop_encodeBlockAsm14B emit_remainder_encodeBlockAsm14B: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm14B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm14B: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14B MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm14B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm14B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm14B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm14B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm14B MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm14B four_bytes_emit_remainder_encodeBlockAsm14B: MOVQ DX, SI SHRL $0x10, SI MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB SI, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm14B three_bytes_emit_remainder_encodeBlockAsm14B: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm14B two_bytes_emit_remainder_encodeBlockAsm14B: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm14B one_byte_emit_remainder_encodeBlockAsm14B: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm14B: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm14B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), AL MOVB DL, (CX) MOVB AL, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3: MOVW (AX), DX MOVB 2(AX), AL MOVW DX, (CX) MOVB AL, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), AX MOVL DX, (CX) MOVL AX, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), AX MOVQ DX, (CX) MOVQ AX, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm14B: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm14B: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm12B(SB), $4128-56 MOVQ $0x00000020, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm12B: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm12B MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm12B: MOVQ (CX)(AX*1), SI MOVL AX, BX SUBL 20(SP), BX SHRL $0x04, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), DI CMPL BX, DI JGT emit_remainder_encodeBlockAsm12B MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ SI, R8 MOVQ SI, R9 SHRQ $0x08, R9 SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x34, R8 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x34, R9 MOVL 32(SP)(R8*1), BX MOVL 32(SP)(R9*1), DI MOVL AX, 32(SP)(R8*1) LEAL 1(AX), R8 MOVL R8, 32(SP)(R9*1) MOVL AX, R8 SUBL 24(SP), R8 MOVL 1(CX)(R8*1), R10 MOVQ SI, R9 SHLQ $0x08, R9 CMPL R9, R10 JNE no_repeat_found_encodeBlockAsm12B LEAQ 1(AX), SI MOVL 20(SP), BX TESTL R8, R8 JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: CMPL SI, BX JG repeat_extend_back_end_encodeBlockAsm12B MOVB -1(CX)(R8*1), DL MOVB -1(CX)(SI*1), DI CMPB DL, DI JNE repeat_extend_back_end_encodeBlockAsm12B LEAQ -1(SI), SI DECL R8 JZ repeat_extend_back_end_encodeBlockAsm12B JMP repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: MOVL 20(SP), BX CMPL BX, SI JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12B MOVL SI, DI MOVL SI, 20(SP) LEAQ (CX)(BX*1), R8 SUBL BX, DI MOVQ dst_base+0(FP), BX MOVQ DI, R9 SUBL $0x01, R9 JC emit_literal_done_repeat_emit_encodeBlockAsm12B CMPL R9, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL R9, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B CMPL R9, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm12B CMPL R9, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm12B MOVB $0xfc, (BX) MOVL R9, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm12B four_bytes_repeat_emit_encodeBlockAsm12B: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (BX) MOVW R9, 1(BX) MOVB R10, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf4, (BX) MOVW R9, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (BX) MOVB R9, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: SHLB $0x02, R9 MOVB R9, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm12B: LEAQ (BX)(DI*1), R9 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (BX) MOVB R8, -1(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4: MOVL (R8), R9 MOVL R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (BX) MOVB R8, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (BX) MOVL R8, -4(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (BX) MOVQ R8, -8(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(DI*1) MOVOU X3, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(DI*1) MOVOU X9, -112(BX)(DI*1) MOVOU X10, -96(BX)(DI*1) MOVOU X11, -80(BX)(DI*1) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail MOVQ R9, BX emit_literal_done_repeat_emit_encodeBlockAsm12B: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm12B: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ R8, R8 CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_repeat_extend BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(R8), R8 CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_single_loopback_repeat_extend: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE repeat_extend_forward_end_encodeBlockAsm12B LEAQ 1(R8), R8 DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm12B: ADDL R8, AX MOVL AX, BX SUBL SI, BX MOVL 24(SP), SI MOVQ dst_base+0(FP), DI MOVL 20(SP), R8 TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B emit_repeat_again_match_repeat_: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_match_repeat_ CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(BX), BX ADDQ $0x05, DI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B four_bytes_remain_repeat_as_copy_encodeBlockAsm12B: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm12B MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(BX), BX ADDQ $0x03, DI emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI repeat_end_emit_encodeBlockAsm12B: MOVQ DI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm12B JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ SI, R8 SHRQ $0x10, R8 SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x34, R8 CMPL (CX)(BX*1), SI SHRQ $0x08, SI JEQ candidate_match_encodeBlockAsm12B MOVL 32(SP)(R8*1), BX CMPL (CX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12B LEAQ 2(AX), DI MOVL DI, 32(SP)(R8*1) SHRQ $0x08, SI CMPL (CX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm12B MOVL 28(SP), AX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: LEAQ -2(AX), BX MOVL BX, 32(SP)(R8*1) INCL AX MOVL DI, BX candidate_match_encodeBlockAsm12B: MOVL 20(SP), SI TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: CMPL AX, SI JG match_extend_back_end_encodeBlockAsm12B MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), DI CMPB DL, DI JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: MOVL AX, SI SUBL 20(SP), SI LEAQ dst_base+0(FP)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: MOVL BX, SI MOVL 20(SP), DI CMPL DI, SI JEQ emit_literal_skip_match_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 20(SP) LEAQ (CX)(DI*1), SI SUBL DI, R8 MOVQ dst_base+0(FP), DI MOVQ R8, R9 SUBL $0x01, R9 JC emit_literal_done_match_emit_encodeBlockAsm12B CMPL R9, $0x3c JLT one_byte_match_emit_encodeBlockAsm12B CMPL R9, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B CMPL R9, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm12B CMPL R9, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm12B MOVB $0xfc, (DI) MOVL R9, 1(DI) ADDQ $0x05, DI JMP memmove_match_emit_encodeBlockAsm12B four_bytes_match_emit_encodeBlockAsm12B: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (DI) MOVW R9, 1(DI) MOVB R10, 3(DI) ADDQ $0x04, DI JMP memmove_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf4, (DI) MOVW R9, 1(DI) ADDQ $0x03, DI JMP memmove_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (DI) MOVB R9, 1(DI) ADDQ $0x02, DI JMP memmove_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: SHLB $0x02, R9 MOVB R9, (DI) ADDQ $0x01, DI memmove_match_emit_encodeBlockAsm12B: LEAQ (DI)(R8*1), R9 NOP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail: TESTQ R8, R8 JEQ emit_literal_done_match_emit_encodeBlockAsm12B CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (DI) MOVB SI, -1(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4: MOVL (SI), R9 MOVL R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (DI) MOVB SI, 2(DI) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (DI) MOVL SI, -4(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (DI) MOVQ SI, -8(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (DI) MOVOU X1, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, -32(DI)(R8*1) MOVOU X3, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, -128(DI)(R8*1) MOVOU X9, -112(DI)(R8*1) MOVOU X10, -96(DI)(R8*1) MOVOU X11, -80(DI)(R8*1) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, 128(DI) MOVOU X9, 144(DI) MOVOU X10, 160(DI) MOVOU X11, 176(DI) MOVOU X12, 192(DI) MOVOU X13, 208(DI) MOVOU X14, 224(DI) MOVOU X15, 240(DI) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(DI), DI JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail MOVQ R9, DI emit_literal_done_match_emit_encodeBlockAsm12B: MOVQ DI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm12B: NOP match_nolit_loop_encodeBlockAsm12B: MOVL AX, SI MOVL AX, SI SUBL BX, SI MOVL SI, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), SI SUBL AX, SI XORQ R8, R8 CMPQ SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_match_nolit_encodeBlockAsm12B BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: LEAQ -8(SI), SI LEAQ 8(R8), R8 CMPQ SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B matchlen_single_match_nolit_encodeBlockAsm12B: TESTQ SI, SI JZ match_nolit_end_encodeBlockAsm12B matchlen_single_loopback_match_nolit_encodeBlockAsm12B: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE match_nolit_end_encodeBlockAsm12B LEAQ 1(R8), R8 DECQ SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B match_nolit_end_encodeBlockAsm12B: MOVL 24(SP), SI ADDQ $0x04, R8 MOVQ dst_base+0(FP), DI ADDL R8, AX CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm12B CMPL R8, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm12B MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(R8), R8 ADDQ $0x05, DI CMPL R8, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm12B emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy repeat_five_match_nolit_encodeBlockAsm12B_emit_copy: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_four_match_nolit_encodeBlockAsm12B_emit_copy: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B four_bytes_remain_match_nolit_encodeBlockAsm12B: TESTL R8, R8 JZ match_nolit_emitcopy_end_encodeBlockAsm12B MOVB $0x03, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL R8, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(R8), R8 ADDQ $0x03, DI emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: CMPL R8, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12B MOVB $0x01, DL LEAQ -16(DX)(R8*4), R8 MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: MOVB $0x02, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI match_nolit_emitcopy_end_encodeBlockAsm12B: MOVQ DI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm12B CMPQ DI, (SP) JL match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: MOVQ -2(CX)(AX*1), SI MOVQ $0x0000cf1bbcdcbf9b, DI MOVQ SI, R8 SHRQ $0x10, SI MOVQ SI, R9 SHLQ $0x10, R8 IMULQ DI, R8 SHRQ $0x34, R8 SHLQ $0x10, R9 IMULQ DI, R9 SHRQ $0x34, R9 MOVL 32(SP)(R8*1), DI MOVL 32(SP)(R9*1), DI LEAQ -2(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, 32(SP)(R9*1) CMPL (CX)(R9*1), SI JEQ match_nolit_loop_encodeBlockAsm12B INCL AX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12B: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12B MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm12B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm12B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm12B MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm12B four_bytes_emit_remainder_encodeBlockAsm12B: MOVQ DX, SI SHRL $0x10, SI MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB SI, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), AL MOVB DL, (CX) MOVB AL, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: MOVW (AX), DX MOVB 2(AX), AL MOVW DX, (CX) MOVB AL, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), AX MOVL DX, (CX) MOVL AX, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), AX MOVQ DX, (CX) MOVQ AX, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm12B: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsmAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsmAvx(SB), $65568-56 MOVQ $0x00000200, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsmAvx: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsmAvx MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsmAvx: MOVQ (CX)(AX*1), SI MOVL AX, BX SUBL 20(SP), BX SHRL $0x06, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), DI CMPL BX, DI JGT emit_remainder_encodeBlockAsmAvx MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ SI, R8 MOVQ SI, R9 SHRQ $0x08, R9 SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x30, R8 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x30, R9 MOVL 32(SP)(R8*1), BX MOVL 32(SP)(R9*1), DI MOVL AX, 32(SP)(R8*1) LEAL 1(AX), R8 MOVL R8, 32(SP)(R9*1) MOVL AX, R8 SUBL 24(SP), R8 MOVL 1(CX)(R8*1), R10 MOVQ SI, R9 SHLQ $0x08, R9 CMPL R9, R10 JNE no_repeat_found_encodeBlockAsmAvx LEAQ 1(AX), SI MOVL 20(SP), BX TESTL R8, R8 JZ repeat_extend_back_end_encodeBlockAsmAvx repeat_extend_back_loop_encodeBlockAsmAvx: CMPL SI, BX JG repeat_extend_back_end_encodeBlockAsmAvx MOVB -1(CX)(R8*1), DL MOVB -1(CX)(SI*1), DI CMPB DL, DI JNE repeat_extend_back_end_encodeBlockAsmAvx LEAQ -1(SI), SI DECL R8 JZ repeat_extend_back_end_encodeBlockAsmAvx JMP repeat_extend_back_loop_encodeBlockAsmAvx repeat_extend_back_end_encodeBlockAsmAvx: MOVL 20(SP), BX CMPL BX, SI JEQ emit_literal_skip_repeat_emit_encodeBlockAsmAvx MOVL SI, DI MOVL SI, 20(SP) LEAQ (CX)(BX*1), R8 SUBL BX, DI MOVQ dst_base+0(FP), BX MOVQ DI, R9 SUBL $0x01, R9 JC emit_literal_done_repeat_emit_encodeBlockAsmAvx CMPL R9, $0x3c JLT one_byte_repeat_emit_encodeBlockAsmAvx CMPL R9, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsmAvx CMPL R9, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsmAvx CMPL R9, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsmAvx MOVB $0xfc, (BX) MOVL R9, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsmAvx four_bytes_repeat_emit_encodeBlockAsmAvx: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (BX) MOVW R9, 1(BX) MOVB R10, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsmAvx three_bytes_repeat_emit_encodeBlockAsmAvx: MOVB $0xf4, (BX) MOVW R9, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsmAvx two_bytes_repeat_emit_encodeBlockAsmAvx: MOVB $0xf0, (BX) MOVB R9, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsmAvx one_byte_repeat_emit_encodeBlockAsmAvx: SHLB $0x02, R9 MOVB R9, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsmAvx: LEAQ (BX)(DI*1), R9 NOP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (BX) MOVB R10, -1(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (BX) MOVB R10, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (BX) MOVL R10, -4(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (BX) MOVQ R10, -8(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(DI*1) MOVOU X3, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(DI*1) MOVOU X9, -112(BX)(DI*1) MOVOU X10, -96(BX)(DI*1) MOVOU X11, -80(BX)(DI*1) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ BX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, BX ADDQ $0x20, BX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ BX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (BX) VMOVDQA Y1, 32(BX) VMOVDQA Y2, 64(BX) VMOVDQA Y3, 96(BX) ADDQ R9, BX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ BX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx MOVQ R9, BX emit_literal_done_repeat_emit_encodeBlockAsmAvx: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsmAvx: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ R8, R8 CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_repeat_extend BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP repeat_extend_forward_end_encodeBlockAsmAvx matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(R8), R8 CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsmAvx matchlen_single_loopback_repeat_extend: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE repeat_extend_forward_end_encodeBlockAsmAvx LEAQ 1(R8), R8 DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsmAvx: ADDL R8, AX MOVL AX, BX SUBL SI, BX MOVL 24(SP), SI MOVQ dst_base+0(FP), DI MOVL 20(SP), R8 TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsmAvx emit_repeat_again_match_repeat_: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_match_repeat_ CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_match_repeat_: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_as_copy_encodeBlockAsmAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(BX), BX ADDQ $0x05, DI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsmAvx MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsmAvx two_byte_offset_repeat_as_copy_encodeBlockAsmAvx: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(BX), BX ADDQ $0x03, DI emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsmAvx emit_copy_three_repeat_as_copy_encodeBlockAsmAvx: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI repeat_end_emit_encodeBlockAsmAvx: MOVQ DI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsmAvx JMP search_loop_encodeBlockAsmAvx no_repeat_found_encodeBlockAsmAvx: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ SI, R8 SHRQ $0x10, R8 SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x30, R8 CMPL (CX)(BX*1), SI SHRQ $0x08, SI JEQ candidate_match_encodeBlockAsmAvx MOVL 32(SP)(R8*1), BX CMPL (CX)(DI*1), SI JEQ candidate2_match_encodeBlockAsmAvx LEAQ 2(AX), DI MOVL DI, 32(SP)(R8*1) SHRQ $0x08, SI CMPL (CX)(BX*1), SI JEQ candidate3_match_encodeBlockAsmAvx MOVL 28(SP), AX JMP search_loop_encodeBlockAsmAvx candidate3_match_encodeBlockAsmAvx: ADDL $0x02, AX JMP candidate_match_encodeBlockAsmAvx candidate2_match_encodeBlockAsmAvx: LEAQ -2(AX), BX MOVL BX, 32(SP)(R8*1) INCL AX MOVL DI, BX candidate_match_encodeBlockAsmAvx: MOVL 20(SP), SI TESTL BX, BX JZ match_extend_back_end_encodeBlockAsmAvx match_extend_back_loop_encodeBlockAsmAvx: CMPL AX, SI JG match_extend_back_end_encodeBlockAsmAvx MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), DI CMPB DL, DI JNE match_extend_back_end_encodeBlockAsmAvx LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsmAvx JMP match_extend_back_loop_encodeBlockAsmAvx match_extend_back_end_encodeBlockAsmAvx: MOVL AX, SI SUBL 20(SP), SI LEAQ dst_base+0(FP)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsmAvx: MOVL BX, SI MOVL 20(SP), DI CMPL DI, SI JEQ emit_literal_skip_match_emit_encodeBlockAsmAvx MOVL SI, R8 MOVL SI, 20(SP) LEAQ (CX)(DI*1), SI SUBL DI, R8 MOVQ dst_base+0(FP), DI MOVQ R8, R9 SUBL $0x01, R9 JC emit_literal_done_match_emit_encodeBlockAsmAvx CMPL R9, $0x3c JLT one_byte_match_emit_encodeBlockAsmAvx CMPL R9, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsmAvx CMPL R9, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsmAvx CMPL R9, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsmAvx MOVB $0xfc, (DI) MOVL R9, 1(DI) ADDQ $0x05, DI JMP memmove_match_emit_encodeBlockAsmAvx four_bytes_match_emit_encodeBlockAsmAvx: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (DI) MOVW R9, 1(DI) MOVB R10, 3(DI) ADDQ $0x04, DI JMP memmove_match_emit_encodeBlockAsmAvx three_bytes_match_emit_encodeBlockAsmAvx: MOVB $0xf4, (DI) MOVW R9, 1(DI) ADDQ $0x03, DI JMP memmove_match_emit_encodeBlockAsmAvx two_bytes_match_emit_encodeBlockAsmAvx: MOVB $0xf0, (DI) MOVB R9, 1(DI) ADDQ $0x02, DI JMP memmove_match_emit_encodeBlockAsmAvx one_byte_match_emit_encodeBlockAsmAvx: SHLB $0x02, R9 MOVB R9, (DI) ADDQ $0x01, DI memmove_match_emit_encodeBlockAsmAvx: LEAQ (DI)(R8*1), R9 NOP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail: TESTQ R8, R8 JEQ emit_literal_done_match_emit_encodeBlockAsmAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (DI) MOVB R10, -1(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (DI) MOVB R10, 2(DI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (DI) MOVL R10, -4(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (DI) MOVQ R10, -8(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (DI) MOVOU X1, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, -32(DI)(R8*1) MOVOU X3, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, -128(DI)(R8*1) MOVOU X9, -112(DI)(R8*1) MOVOU X10, -96(DI)(R8*1) MOVOU X11, -80(DI)(R8*1) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, 128(DI) MOVOU X9, 144(DI) MOVOU X10, 160(DI) MOVOU X11, 176(DI) MOVOU X12, 192(DI) MOVOU X13, 208(DI) MOVOU X14, 224(DI) MOVOU X15, 240(DI) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(DI), DI JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ DI, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, DI ADDQ $0x20, DI MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ DI, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (DI) VMOVDQA Y1, 32(DI) VMOVDQA Y2, 64(DI) VMOVDQA Y3, 96(DI) ADDQ R9, DI SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ DI, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) JMP emit_literal_done_match_emit_encodeBlockAsmAvx MOVQ R9, DI emit_literal_done_match_emit_encodeBlockAsmAvx: MOVQ DI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsmAvx: NOP match_nolit_loop_encodeBlockAsmAvx: MOVL AX, SI MOVL AX, SI SUBL BX, SI MOVL SI, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), SI SUBL AX, SI XORQ R8, R8 CMPQ SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsmAvx matchlen_loopback_match_nolit_encodeBlockAsmAvx: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_match_nolit_encodeBlockAsmAvx BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP match_nolit_end_encodeBlockAsmAvx matchlen_loop_match_nolit_encodeBlockAsmAvx: LEAQ -8(SI), SI LEAQ 8(R8), R8 CMPQ SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx matchlen_single_match_nolit_encodeBlockAsmAvx: TESTQ SI, SI JZ match_nolit_end_encodeBlockAsmAvx matchlen_single_loopback_match_nolit_encodeBlockAsmAvx: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE match_nolit_end_encodeBlockAsmAvx LEAQ 1(R8), R8 DECQ SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx match_nolit_end_encodeBlockAsmAvx: MOVL 24(SP), SI ADDQ $0x04, R8 MOVQ dst_base+0(FP), DI ADDL R8, AX CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsmAvx CMPL R8, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(R8), R8 ADDQ $0x05, DI CMPL R8, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsmAvx emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx four_bytes_remain_match_nolit_encodeBlockAsmAvx: TESTL R8, R8 JZ match_nolit_emitcopy_end_encodeBlockAsmAvx MOVB $0x03, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx two_byte_offset_match_nolit_encodeBlockAsmAvx: CMPL R8, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(R8), R8 ADDQ $0x03, DI emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx two_byte_offset_short_match_nolit_encodeBlockAsmAvx: CMPL R8, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsmAvx CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsmAvx MOVB $0x01, DL LEAQ -16(DX)(R8*4), R8 MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx emit_copy_three_match_nolit_encodeBlockAsmAvx: MOVB $0x02, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI match_nolit_emitcopy_end_encodeBlockAsmAvx: MOVQ DI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsmAvx CMPQ DI, (SP) JL match_nolit_dst_ok_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsmAvx: MOVQ -2(CX)(AX*1), SI MOVQ $0x0000cf1bbcdcbf9b, DI MOVQ SI, R8 SHRQ $0x10, SI MOVQ SI, R9 SHLQ $0x10, R8 IMULQ DI, R8 SHRQ $0x30, R8 SHLQ $0x10, R9 IMULQ DI, R9 SHRQ $0x30, R9 MOVL 32(SP)(R8*1), DI MOVL 32(SP)(R9*1), DI LEAQ -2(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, 32(SP)(R9*1) CMPL (CX)(R9*1), SI JEQ match_nolit_loop_encodeBlockAsmAvx INCL AX JMP search_loop_encodeBlockAsmAvx emit_remainder_encodeBlockAsmAvx: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsmAvx: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsmAvx MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsmAvx MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsmAvx four_bytes_emit_remainder_encodeBlockAsmAvx: MOVQ DX, SI SHRL $0x10, SI MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB SI, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsmAvx three_bytes_emit_remainder_encodeBlockAsmAvx: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsmAvx two_bytes_emit_remainder_encodeBlockAsmAvx: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsmAvx one_byte_emit_remainder_encodeBlockAsmAvx: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsmAvx: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), SI MOVB DL, (CX) MOVB SI, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3: MOVW (AX), DX MOVB 2(AX), SI MOVW DX, (CX) MOVB SI, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), SI MOVL DX, (CX) MOVL SI, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), SI MOVQ DX, (CX) MOVQ SI, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (AX)(BX*1), SI MOVQ CX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, DX ANDQ $0xffffffe0, CX ADDQ $0x20, CX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ CX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (AX), Y4 ADDQ DI, AX SUBQ DX, BX emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y1 VMOVDQU 64(AX), Y2 VMOVDQU 96(AX), Y3 ADDQ DX, AX VMOVDQA Y0, (CX) VMOVDQA Y1, 32(CX) VMOVDQA Y2, 64(CX) VMOVDQA Y3, 96(CX) ADDQ DX, CX SUBQ DX, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ DX, BX ADDQ CX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsmAvx: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsmAvx: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm14BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm14BAvx(SB), $16416-56 MOVQ $0x00000080, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm14BAvx: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm14BAvx MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm14BAvx: MOVQ (CX)(AX*1), SI MOVL AX, BX SUBL 20(SP), BX SHRL $0x05, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), DI CMPL BX, DI JGT emit_remainder_encodeBlockAsm14BAvx MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ SI, R8 MOVQ SI, R9 SHRQ $0x08, R9 SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x32, R8 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x32, R9 MOVL 32(SP)(R8*1), BX MOVL 32(SP)(R9*1), DI MOVL AX, 32(SP)(R8*1) LEAL 1(AX), R8 MOVL R8, 32(SP)(R9*1) MOVL AX, R8 SUBL 24(SP), R8 MOVL 1(CX)(R8*1), R10 MOVQ SI, R9 SHLQ $0x08, R9 CMPL R9, R10 JNE no_repeat_found_encodeBlockAsm14BAvx LEAQ 1(AX), SI MOVL 20(SP), BX TESTL R8, R8 JZ repeat_extend_back_end_encodeBlockAsm14BAvx repeat_extend_back_loop_encodeBlockAsm14BAvx: CMPL SI, BX JG repeat_extend_back_end_encodeBlockAsm14BAvx MOVB -1(CX)(R8*1), DL MOVB -1(CX)(SI*1), DI CMPB DL, DI JNE repeat_extend_back_end_encodeBlockAsm14BAvx LEAQ -1(SI), SI DECL R8 JZ repeat_extend_back_end_encodeBlockAsm14BAvx JMP repeat_extend_back_loop_encodeBlockAsm14BAvx repeat_extend_back_end_encodeBlockAsm14BAvx: MOVL 20(SP), BX CMPL BX, SI JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx MOVL SI, DI MOVL SI, 20(SP) LEAQ (CX)(BX*1), R8 SUBL BX, DI MOVQ dst_base+0(FP), BX MOVQ DI, R9 SUBL $0x01, R9 JC emit_literal_done_repeat_emit_encodeBlockAsm14BAvx CMPL R9, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm14BAvx CMPL R9, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm14BAvx CMPL R9, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm14BAvx CMPL R9, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm14BAvx MOVB $0xfc, (BX) MOVL R9, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx four_bytes_repeat_emit_encodeBlockAsm14BAvx: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (BX) MOVW R9, 1(BX) MOVB R10, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx three_bytes_repeat_emit_encodeBlockAsm14BAvx: MOVB $0xf4, (BX) MOVW R9, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx two_bytes_repeat_emit_encodeBlockAsm14BAvx: MOVB $0xf0, (BX) MOVB R9, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx one_byte_repeat_emit_encodeBlockAsm14BAvx: SHLB $0x02, R9 MOVB R9, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm14BAvx: LEAQ (BX)(DI*1), R9 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm14BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (BX) MOVB R10, -1(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (BX) MOVB R10, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (BX) MOVL R10, -4(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (BX) MOVQ R10, -8(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(DI*1) MOVOU X3, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(DI*1) MOVOU X9, -112(BX)(DI*1) MOVOU X10, -96(BX)(DI*1) MOVOU X11, -80(BX)(DI*1) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ BX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, BX ADDQ $0x20, BX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ BX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (BX) VMOVDQA Y1, 32(BX) VMOVDQA Y2, 64(BX) VMOVDQA Y3, 96(BX) ADDQ R9, BX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ BX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx MOVQ R9, BX emit_literal_done_repeat_emit_encodeBlockAsm14BAvx: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ R8, R8 CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_repeat_extend BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP repeat_extend_forward_end_encodeBlockAsm14BAvx matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(R8), R8 CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm14BAvx matchlen_single_loopback_repeat_extend: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE repeat_extend_forward_end_encodeBlockAsm14BAvx LEAQ 1(R8), R8 DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm14BAvx: ADDL R8, AX MOVL AX, BX SUBL SI, BX MOVL 24(SP), SI MOVQ dst_base+0(FP), DI MOVL 20(SP), R8 TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm14BAvx emit_repeat_again_match_repeat_: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_match_repeat_ CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_offset_match_repeat_: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_as_copy_encodeBlockAsm14BAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(BX), BX ADDQ $0x05, DI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm14BAvx MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(BX), BX ADDQ $0x03, DI emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm14BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI repeat_end_emit_encodeBlockAsm14BAvx: MOVQ DI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm14BAvx JMP search_loop_encodeBlockAsm14BAvx no_repeat_found_encodeBlockAsm14BAvx: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ SI, R8 SHRQ $0x10, R8 SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x32, R8 CMPL (CX)(BX*1), SI SHRQ $0x08, SI JEQ candidate_match_encodeBlockAsm14BAvx MOVL 32(SP)(R8*1), BX CMPL (CX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm14BAvx LEAQ 2(AX), DI MOVL DI, 32(SP)(R8*1) SHRQ $0x08, SI CMPL (CX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm14BAvx MOVL 28(SP), AX JMP search_loop_encodeBlockAsm14BAvx candidate3_match_encodeBlockAsm14BAvx: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm14BAvx candidate2_match_encodeBlockAsm14BAvx: LEAQ -2(AX), BX MOVL BX, 32(SP)(R8*1) INCL AX MOVL DI, BX candidate_match_encodeBlockAsm14BAvx: MOVL 20(SP), SI TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm14BAvx match_extend_back_loop_encodeBlockAsm14BAvx: CMPL AX, SI JG match_extend_back_end_encodeBlockAsm14BAvx MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), DI CMPB DL, DI JNE match_extend_back_end_encodeBlockAsm14BAvx LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm14BAvx JMP match_extend_back_loop_encodeBlockAsm14BAvx match_extend_back_end_encodeBlockAsm14BAvx: MOVL AX, SI SUBL 20(SP), SI LEAQ dst_base+0(FP)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm14BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm14BAvx: MOVL BX, SI MOVL 20(SP), DI CMPL DI, SI JEQ emit_literal_skip_match_emit_encodeBlockAsm14BAvx MOVL SI, R8 MOVL SI, 20(SP) LEAQ (CX)(DI*1), SI SUBL DI, R8 MOVQ dst_base+0(FP), DI MOVQ R8, R9 SUBL $0x01, R9 JC emit_literal_done_match_emit_encodeBlockAsm14BAvx CMPL R9, $0x3c JLT one_byte_match_emit_encodeBlockAsm14BAvx CMPL R9, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm14BAvx CMPL R9, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm14BAvx CMPL R9, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm14BAvx MOVB $0xfc, (DI) MOVL R9, 1(DI) ADDQ $0x05, DI JMP memmove_match_emit_encodeBlockAsm14BAvx four_bytes_match_emit_encodeBlockAsm14BAvx: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (DI) MOVW R9, 1(DI) MOVB R10, 3(DI) ADDQ $0x04, DI JMP memmove_match_emit_encodeBlockAsm14BAvx three_bytes_match_emit_encodeBlockAsm14BAvx: MOVB $0xf4, (DI) MOVW R9, 1(DI) ADDQ $0x03, DI JMP memmove_match_emit_encodeBlockAsm14BAvx two_bytes_match_emit_encodeBlockAsm14BAvx: MOVB $0xf0, (DI) MOVB R9, 1(DI) ADDQ $0x02, DI JMP memmove_match_emit_encodeBlockAsm14BAvx one_byte_match_emit_encodeBlockAsm14BAvx: SHLB $0x02, R9 MOVB R9, (DI) ADDQ $0x01, DI memmove_match_emit_encodeBlockAsm14BAvx: LEAQ (DI)(R8*1), R9 NOP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail: TESTQ R8, R8 JEQ emit_literal_done_match_emit_encodeBlockAsm14BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (DI) MOVB R10, -1(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (DI) MOVB R10, 2(DI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (DI) MOVL R10, -4(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (DI) MOVQ R10, -8(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (DI) MOVOU X1, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, -32(DI)(R8*1) MOVOU X3, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, -128(DI)(R8*1) MOVOU X9, -112(DI)(R8*1) MOVOU X10, -96(DI)(R8*1) MOVOU X11, -80(DI)(R8*1) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, 128(DI) MOVOU X9, 144(DI) MOVOU X10, 160(DI) MOVOU X11, 176(DI) MOVOU X12, 192(DI) MOVOU X13, 208(DI) MOVOU X14, 224(DI) MOVOU X15, 240(DI) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(DI), DI JGE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ DI, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, DI ADDQ $0x20, DI MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ DI, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (DI) VMOVDQA Y1, 32(DI) VMOVDQA Y2, 64(DI) VMOVDQA Y3, 96(DI) ADDQ R9, DI SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ DI, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx MOVQ R9, DI emit_literal_done_match_emit_encodeBlockAsm14BAvx: MOVQ DI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm14BAvx: NOP match_nolit_loop_encodeBlockAsm14BAvx: MOVL AX, SI MOVL AX, SI SUBL BX, SI MOVL SI, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), SI SUBL AX, SI XORQ R8, R8 CMPQ SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm14BAvx matchlen_loopback_match_nolit_encodeBlockAsm14BAvx: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_match_nolit_encodeBlockAsm14BAvx BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP match_nolit_end_encodeBlockAsm14BAvx matchlen_loop_match_nolit_encodeBlockAsm14BAvx: LEAQ -8(SI), SI LEAQ 8(R8), R8 CMPQ SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm14BAvx matchlen_single_match_nolit_encodeBlockAsm14BAvx: TESTQ SI, SI JZ match_nolit_end_encodeBlockAsm14BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE match_nolit_end_encodeBlockAsm14BAvx LEAQ 1(R8), R8 DECQ SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx match_nolit_end_encodeBlockAsm14BAvx: MOVL 24(SP), SI ADDQ $0x04, R8 MOVQ dst_base+0(FP), DI ADDL R8, AX CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm14BAvx CMPL R8, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm14BAvx MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(R8), R8 ADDQ $0x05, DI CMPL R8, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm14BAvx emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx four_bytes_remain_match_nolit_encodeBlockAsm14BAvx: TESTL R8, R8 JZ match_nolit_emitcopy_end_encodeBlockAsm14BAvx MOVB $0x03, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx two_byte_offset_match_nolit_encodeBlockAsm14BAvx: CMPL R8, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(R8), R8 ADDQ $0x03, DI emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx: CMPL R8, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx MOVB $0x01, DL LEAQ -16(DX)(R8*4), R8 MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx emit_copy_three_match_nolit_encodeBlockAsm14BAvx: MOVB $0x02, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI match_nolit_emitcopy_end_encodeBlockAsm14BAvx: MOVQ DI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm14BAvx CMPQ DI, (SP) JL match_nolit_dst_ok_encodeBlockAsm14BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm14BAvx: MOVQ -2(CX)(AX*1), SI MOVQ $0x0000cf1bbcdcbf9b, DI MOVQ SI, R8 SHRQ $0x10, SI MOVQ SI, R9 SHLQ $0x10, R8 IMULQ DI, R8 SHRQ $0x32, R8 SHLQ $0x10, R9 IMULQ DI, R9 SHRQ $0x32, R9 MOVL 32(SP)(R8*1), DI MOVL 32(SP)(R9*1), DI LEAQ -2(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, 32(SP)(R9*1) CMPL (CX)(R9*1), SI JEQ match_nolit_loop_encodeBlockAsm14BAvx INCL AX JMP search_loop_encodeBlockAsm14BAvx emit_remainder_encodeBlockAsm14BAvx: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm14BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm14BAvx: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm14BAvx MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx four_bytes_emit_remainder_encodeBlockAsm14BAvx: MOVQ DX, SI SHRL $0x10, SI MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB SI, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx three_bytes_emit_remainder_encodeBlockAsm14BAvx: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx two_bytes_emit_remainder_encodeBlockAsm14BAvx: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx one_byte_emit_remainder_encodeBlockAsm14BAvx: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm14BAvx: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm14BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), SI MOVB DL, (CX) MOVB SI, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3: MOVW (AX), DX MOVB 2(AX), SI MOVW DX, (CX) MOVB SI, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), SI MOVL DX, (CX) MOVL SI, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), SI MOVQ DX, (CX) MOVQ SI, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned: LEAQ (AX)(BX*1), SI MOVQ CX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, DX ANDQ $0xffffffe0, CX ADDQ $0x20, CX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ CX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (AX), Y4 ADDQ DI, AX SUBQ DX, BX emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop: VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y1 VMOVDQU 64(AX), Y2 VMOVDQU 96(AX), Y3 ADDQ DX, AX VMOVDQA Y0, (CX) VMOVDQA Y1, 32(CX) VMOVDQA Y2, 64(CX) VMOVDQA Y3, 96(CX) ADDQ DX, CX SUBQ DX, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop ADDQ DX, BX ADDQ CX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm14BAvx: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm12BAvx(SB), $4128-56 MOVQ $0x00000020, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm12BAvx: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm12BAvx MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm12BAvx: MOVQ (CX)(AX*1), SI MOVL AX, BX SUBL 20(SP), BX SHRL $0x04, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), DI CMPL BX, DI JGT emit_remainder_encodeBlockAsm12BAvx MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ SI, R8 MOVQ SI, R9 SHRQ $0x08, R9 SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x34, R8 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x34, R9 MOVL 32(SP)(R8*1), BX MOVL 32(SP)(R9*1), DI MOVL AX, 32(SP)(R8*1) LEAL 1(AX), R8 MOVL R8, 32(SP)(R9*1) MOVL AX, R8 SUBL 24(SP), R8 MOVL 1(CX)(R8*1), R10 MOVQ SI, R9 SHLQ $0x08, R9 CMPL R9, R10 JNE no_repeat_found_encodeBlockAsm12BAvx LEAQ 1(AX), SI MOVL 20(SP), BX TESTL R8, R8 JZ repeat_extend_back_end_encodeBlockAsm12BAvx repeat_extend_back_loop_encodeBlockAsm12BAvx: CMPL SI, BX JG repeat_extend_back_end_encodeBlockAsm12BAvx MOVB -1(CX)(R8*1), DL MOVB -1(CX)(SI*1), DI CMPB DL, DI JNE repeat_extend_back_end_encodeBlockAsm12BAvx LEAQ -1(SI), SI DECL R8 JZ repeat_extend_back_end_encodeBlockAsm12BAvx JMP repeat_extend_back_loop_encodeBlockAsm12BAvx repeat_extend_back_end_encodeBlockAsm12BAvx: MOVL 20(SP), BX CMPL BX, SI JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx MOVL SI, DI MOVL SI, 20(SP) LEAQ (CX)(BX*1), R8 SUBL BX, DI MOVQ dst_base+0(FP), BX MOVQ DI, R9 SUBL $0x01, R9 JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx CMPL R9, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12BAvx CMPL R9, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx CMPL R9, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx CMPL R9, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx MOVB $0xfc, (BX) MOVL R9, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx four_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (BX) MOVW R9, 1(BX) MOVB R10, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx three_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVB $0xf4, (BX) MOVW R9, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx two_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVB $0xf0, (BX) MOVB R9, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx one_byte_repeat_emit_encodeBlockAsm12BAvx: SHLB $0x02, R9 MOVB R9, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm12BAvx: LEAQ (BX)(DI*1), R9 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R10 MOVB R9, (BX) MOVB R10, -1(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4: MOVL (R8), R9 MOVL R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R10 MOVW R9, (BX) MOVB R10, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R10 MOVL R9, (BX) MOVL R10, -4(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (R8), R9 MOVQ R9, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R10 MOVQ R9, (BX) MOVQ R10, -8(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(DI*1) MOVOU X3, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU -128(R8)(DI*1), X8 MOVOU -112(R8)(DI*1), X9 MOVOU -96(R8)(DI*1), X10 MOVOU -80(R8)(DI*1), X11 MOVOU -64(R8)(DI*1), X12 MOVOU -48(R8)(DI*1), X13 MOVOU -32(R8)(DI*1), X14 MOVOU -16(R8)(DI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(DI*1) MOVOU X9, -112(BX)(DI*1) MOVOU X10, -96(BX)(DI*1) MOVOU X11, -80(BX)(DI*1) MOVOU X12, -64(BX)(DI*1) MOVOU X13, -48(BX)(DI*1) MOVOU X14, -32(BX)(DI*1) MOVOU X15, -16(BX)(DI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU 32(R8), X2 MOVOU 48(R8), X3 MOVOU 64(R8), X4 MOVOU 80(R8), X5 MOVOU 96(R8), X6 MOVOU 112(R8), X7 MOVOU 128(R8), X8 MOVOU 144(R8), X9 MOVOU 160(R8), X10 MOVOU 176(R8), X11 MOVOU 192(R8), X12 MOVOU 208(R8), X13 MOVOU 224(R8), X14 MOVOU 240(R8), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ DI, $0x00000100 LEAQ 256(R8), R8 LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (R8)(DI*1), R10 MOVQ BX, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, BX ADDQ $0x20, BX MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ BX, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, DI MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (R8), Y4 ADDQ R11, R8 SUBQ R9, DI emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (R8), Y0 VMOVDQU 32(R8), Y1 VMOVDQU 64(R8), Y2 VMOVDQU 96(R8), Y3 ADDQ R9, R8 VMOVDQA Y0, (BX) VMOVDQA Y1, 32(BX) VMOVDQA Y2, 64(BX) VMOVDQA Y3, 96(BX) ADDQ R9, BX SUBQ R9, DI JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R9, DI ADDQ BX, DI VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx MOVQ R9, BX emit_literal_done_repeat_emit_encodeBlockAsm12BAvx: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ R8, R8 CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_repeat_extend BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP repeat_extend_forward_end_encodeBlockAsm12BAvx matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(R8), R8 CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm12BAvx matchlen_single_loopback_repeat_extend: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE repeat_extend_forward_end_encodeBlockAsm12BAvx LEAQ 1(R8), R8 DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm12BAvx: ADDL R8, AX MOVL AX, BX SUBL SI, BX MOVL 24(SP), SI MOVQ dst_base+0(FP), DI MOVL 20(SP), R8 TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12BAvx emit_repeat_again_match_repeat_: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_match_repeat_ CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_match_repeat_: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_as_copy_encodeBlockAsm12BAvx: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(BX), BX ADDQ $0x05, DI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm12BAvx MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(BX), BX ADDQ $0x03, DI emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: MOVQ BX, R8 LEAQ -4(BX), BX CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, SI MOVW $0x001d, (DI) MOVW BX, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (DI) MOVW BX, 2(DI) ADDQ $0x04, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (DI) MOVB BL, 2(DI) ADDQ $0x03, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(BX*4), BX MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BX MOVB BL, (DI) ADDQ $0x02, DI JMP repeat_end_emit_encodeBlockAsm12BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI repeat_end_emit_encodeBlockAsm12BAvx: MOVQ DI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm12BAvx JMP search_loop_encodeBlockAsm12BAvx no_repeat_found_encodeBlockAsm12BAvx: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ SI, R8 SHRQ $0x10, R8 SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x34, R8 CMPL (CX)(BX*1), SI SHRQ $0x08, SI JEQ candidate_match_encodeBlockAsm12BAvx MOVL 32(SP)(R8*1), BX CMPL (CX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12BAvx LEAQ 2(AX), DI MOVL DI, 32(SP)(R8*1) SHRQ $0x08, SI CMPL (CX)(BX*1), SI JEQ candidate3_match_encodeBlockAsm12BAvx MOVL 28(SP), AX JMP search_loop_encodeBlockAsm12BAvx candidate3_match_encodeBlockAsm12BAvx: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm12BAvx candidate2_match_encodeBlockAsm12BAvx: LEAQ -2(AX), BX MOVL BX, 32(SP)(R8*1) INCL AX MOVL DI, BX candidate_match_encodeBlockAsm12BAvx: MOVL 20(SP), SI TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm12BAvx match_extend_back_loop_encodeBlockAsm12BAvx: CMPL AX, SI JG match_extend_back_end_encodeBlockAsm12BAvx MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), DI CMPB DL, DI JNE match_extend_back_end_encodeBlockAsm12BAvx LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm12BAvx JMP match_extend_back_loop_encodeBlockAsm12BAvx match_extend_back_end_encodeBlockAsm12BAvx: MOVL AX, SI SUBL 20(SP), SI LEAQ dst_base+0(FP)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12BAvx: MOVL BX, SI MOVL 20(SP), DI CMPL DI, SI JEQ emit_literal_skip_match_emit_encodeBlockAsm12BAvx MOVL SI, R8 MOVL SI, 20(SP) LEAQ (CX)(DI*1), SI SUBL DI, R8 MOVQ dst_base+0(FP), DI MOVQ R8, R9 SUBL $0x01, R9 JC emit_literal_done_match_emit_encodeBlockAsm12BAvx CMPL R9, $0x3c JLT one_byte_match_emit_encodeBlockAsm12BAvx CMPL R9, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12BAvx CMPL R9, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm12BAvx CMPL R9, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm12BAvx MOVB $0xfc, (DI) MOVL R9, 1(DI) ADDQ $0x05, DI JMP memmove_match_emit_encodeBlockAsm12BAvx four_bytes_match_emit_encodeBlockAsm12BAvx: MOVQ R9, R10 SHRL $0x10, R10 MOVB $0xf8, (DI) MOVW R9, 1(DI) MOVB R10, 3(DI) ADDQ $0x04, DI JMP memmove_match_emit_encodeBlockAsm12BAvx three_bytes_match_emit_encodeBlockAsm12BAvx: MOVB $0xf4, (DI) MOVW R9, 1(DI) ADDQ $0x03, DI JMP memmove_match_emit_encodeBlockAsm12BAvx two_bytes_match_emit_encodeBlockAsm12BAvx: MOVB $0xf0, (DI) MOVB R9, 1(DI) ADDQ $0x02, DI JMP memmove_match_emit_encodeBlockAsm12BAvx one_byte_match_emit_encodeBlockAsm12BAvx: SHLB $0x02, R9 MOVB R9, (DI) ADDQ $0x01, DI memmove_match_emit_encodeBlockAsm12BAvx: LEAQ (DI)(R8*1), R9 NOP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail: TESTQ R8, R8 JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx CMPQ R8, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ R8, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ R8, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ R8, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ R8, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), R10 MOVB R9, (DI) MOVB R10, -1(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4: MOVL (SI), R9 MOVL R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), R10 MOVW R9, (DI) MOVB R10, 2(DI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), R10 MOVL R9, (DI) MOVL R10, -4(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (SI), R9 MOVQ R9, (DI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), R10 MOVQ R9, (DI) MOVQ R10, -8(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (DI) MOVOU X1, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, -32(DI)(R8*1) MOVOU X3, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU -128(SI)(R8*1), X8 MOVOU -112(SI)(R8*1), X9 MOVOU -96(SI)(R8*1), X10 MOVOU -80(SI)(R8*1), X11 MOVOU -64(SI)(R8*1), X12 MOVOU -48(SI)(R8*1), X13 MOVOU -32(SI)(R8*1), X14 MOVOU -16(SI)(R8*1), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, -128(DI)(R8*1) MOVOU X9, -112(DI)(R8*1) MOVOU X10, -96(DI)(R8*1) MOVOU X11, -80(DI)(R8*1) MOVOU X12, -64(DI)(R8*1) MOVOU X13, -48(DI)(R8*1) MOVOU X14, -32(DI)(R8*1) MOVOU X15, -16(DI)(R8*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(R8), R8 MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 MOVOU 128(SI), X8 MOVOU 144(SI), X9 MOVOU 160(SI), X10 MOVOU 176(SI), X11 MOVOU 192(SI), X12 MOVOU 208(SI), X13 MOVOU 224(SI), X14 MOVOU 240(SI), X15 MOVOU X0, (DI) MOVOU X1, 16(DI) MOVOU X2, 32(DI) MOVOU X3, 48(DI) MOVOU X4, 64(DI) MOVOU X5, 80(DI) MOVOU X6, 96(DI) MOVOU X7, 112(DI) MOVOU X8, 128(DI) MOVOU X9, 144(DI) MOVOU X10, 160(DI) MOVOU X11, 176(DI) MOVOU X12, 192(DI) MOVOU X13, 208(DI) MOVOU X14, 224(DI) MOVOU X15, 240(DI) CMPQ R8, $0x00000100 LEAQ 256(SI), SI LEAQ 256(DI), DI JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (SI)(R8*1), R10 MOVQ DI, R12 MOVOU -128(R10), X5 MOVOU -112(R10), X6 MOVQ $0x00000080, R9 ANDQ $0xffffffe0, DI ADDQ $0x20, DI MOVOU -96(R10), X7 MOVOU -80(R10), X8 MOVQ DI, R11 SUBQ R12, R11 MOVOU -64(R10), X9 MOVOU -48(R10), X10 SUBQ R11, R8 MOVOU -32(R10), X11 MOVOU -16(R10), X12 VMOVDQU (SI), Y4 ADDQ R11, SI SUBQ R9, R8 emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 32(SI), Y1 VMOVDQU 64(SI), Y2 VMOVDQU 96(SI), Y3 ADDQ R9, SI VMOVDQA Y0, (DI) VMOVDQA Y1, 32(DI) VMOVDQA Y2, 64(DI) VMOVDQA Y3, 96(DI) ADDQ R9, DI SUBQ R9, R8 JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R9, R8 ADDQ DI, R8 VMOVDQU Y4, (R12) VZEROUPPER MOVOU X5, -128(R8) MOVOU X6, -112(R8) MOVOU X7, -96(R8) MOVOU X8, -80(R8) MOVOU X9, -64(R8) MOVOU X10, -48(R8) MOVOU X11, -32(R8) MOVOU X12, -16(R8) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx MOVQ R9, DI emit_literal_done_match_emit_encodeBlockAsm12BAvx: MOVQ DI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm12BAvx: NOP match_nolit_loop_encodeBlockAsm12BAvx: MOVL AX, SI MOVL AX, SI SUBL BX, SI MOVL SI, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), SI SUBL AX, SI XORQ R8, R8 CMPQ SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12BAvx matchlen_loopback_match_nolit_encodeBlockAsm12BAvx: MOVQ (CX)(R8*1), DI XORQ (CX)(R8*1), DI TESTQ DI, DI JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx BSFQ DI, DI SARQ $0x03, DI LEAQ (R8)(DI*1), R8 JMP match_nolit_end_encodeBlockAsm12BAvx matchlen_loop_match_nolit_encodeBlockAsm12BAvx: LEAQ -8(SI), SI LEAQ 8(R8), R8 CMPQ SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx matchlen_single_match_nolit_encodeBlockAsm12BAvx: TESTQ SI, SI JZ match_nolit_end_encodeBlockAsm12BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx: MOVB (CX)(R8*1), DI CMPB (CX)(R8*1), DI JNE match_nolit_end_encodeBlockAsm12BAvx LEAQ 1(R8), R8 DECQ SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx match_nolit_end_encodeBlockAsm12BAvx: MOVL 24(SP), SI ADDQ $0x04, R8 MOVQ dst_base+0(FP), DI ADDL R8, AX CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx CMPL R8, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx MOVB $0xff, (DI) MOVD SI, 1(DI) LEAQ -64(R8), R8 ADDQ $0x05, DI CMPL R8, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx four_bytes_remain_match_nolit_encodeBlockAsm12BAvx: TESTL R8, R8 JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx MOVB $0x03, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVD SI, 1(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx two_byte_offset_match_nolit_encodeBlockAsm12BAvx: CMPL R8, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx MOVB $0xee, (DI) MOVW SI, 1(DI) LEAQ -60(R8), R8 ADDQ $0x03, DI emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: MOVQ R8, R9 LEAQ -4(R8), R8 CMPL R9, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL R9, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: CMPL R8, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL R8, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL R8, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short LEAQ -16842747(R8), R8 MOVW $0x001d, (DI) MOVW $0xfffb, 2(DI) MOVB $0xff, 4(DI) ADDQ $0x05, DI JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -65536(R8), R8 MOVQ R8, SI MOVW $0x001d, (DI) MOVW R8, 2(DI) SARQ $0x10, SI MOVB SI, 4(DI) ADDQ $0x05, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -256(R8), R8 MOVW $0x0019, (DI) MOVW R8, 2(DI) ADDQ $0x04, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -4(R8), R8 MOVW $0x0015, (DI) MOVB R8, 2(DI) ADDQ $0x03, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: SHLL $0x02, R8 ORL $0x01, R8 MOVW R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: XORQ R9, R9 LEAQ 1(R9)(R8*4), R8 MOVB SI, 1(DI) SARL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx: CMPL R8, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx MOVB $0x01, DL LEAQ -16(DX)(R8*4), R8 MOVB SI, 1(DI) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R8 MOVB R8, (DI) ADDQ $0x02, DI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx emit_copy_three_match_nolit_encodeBlockAsm12BAvx: MOVB $0x02, DL LEAQ -4(DX)(R8*4), R8 MOVB R8, (DI) MOVW SI, 1(DI) ADDQ $0x03, DI match_nolit_emitcopy_end_encodeBlockAsm12BAvx: MOVQ DI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm12BAvx CMPQ DI, (SP) JL match_nolit_dst_ok_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12BAvx: MOVQ -2(CX)(AX*1), SI MOVQ $0x0000cf1bbcdcbf9b, DI MOVQ SI, R8 SHRQ $0x10, SI MOVQ SI, R9 SHLQ $0x10, R8 IMULQ DI, R8 SHRQ $0x34, R8 SHLQ $0x10, R9 IMULQ DI, R9 SHRQ $0x34, R9 MOVL 32(SP)(R8*1), DI MOVL 32(SP)(R9*1), DI LEAQ -2(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, 32(SP)(R9*1) CMPL (CX)(R9*1), SI JEQ match_nolit_loop_encodeBlockAsm12BAvx INCL AX JMP search_loop_encodeBlockAsm12BAvx emit_remainder_encodeBlockAsm12BAvx: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12BAvx: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx four_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVQ DX, SI SHRL $0x10, SI MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB SI, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx three_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx two_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx one_byte_emit_remainder_encodeBlockAsm12BAvx: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12BAvx: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), SI MOVB DL, (CX) MOVB SI, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3: MOVW (AX), DX MOVB 2(AX), SI MOVW DX, (CX) MOVB SI, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), SI MOVL DX, (CX) MOVL SI, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), SI MOVQ DX, (CX) MOVQ SI, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (AX)(BX*1), SI MOVQ CX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, DX ANDQ $0xffffffe0, CX ADDQ $0x20, CX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ CX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, BX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (AX), Y4 ADDQ DI, AX SUBQ DX, BX emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y1 VMOVDQU 64(AX), Y2 VMOVDQU 96(AX), Y3 ADDQ DX, AX VMOVDQA Y0, (CX) VMOVDQA Y1, 32(CX) VMOVDQA Y2, 64(CX) VMOVDQA Y3, 96(CX) ADDQ DX, CX SUBQ DX, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ DX, BX ADDQ CX, BX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12BAvx: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX MOVQ lit_len+32(FP), DX MOVQ DX, BX MOVQ DX, SI SUBL $0x01, SI JC emit_literal_end_standalone CMPL SI, $0x3c JLT one_byte_standalone CMPL SI, $0x00000100 JLT two_bytes_standalone CMPL SI, $0x00010000 JLT three_bytes_standalone CMPL SI, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_standalone four_bytes_standalone: MOVQ SI, DI SHRL $0x10, DI MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB DI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP memmove_standalone one_byte_standalone: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: NOP emit_lit_memmove_standalone_memmove_tail: TESTQ DX, DX JEQ emit_literal_end_standalone CMPQ DX, $0x02 JBE emit_lit_memmove_standalone_memmove_move_1or2 CMPQ DX, $0x04 JB emit_lit_memmove_standalone_memmove_move_3 JBE emit_lit_memmove_standalone_memmove_move_4 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_5through7 JE emit_lit_memmove_standalone_memmove_move_8 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_9through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 CMPQ DX, $0x40 JBE emit_lit_memmove_standalone_memmove_move_33through64 CMPQ DX, $0x80 JBE emit_lit_memmove_standalone_memmove_move_65through128 CMPQ DX, $0x00000100 JBE emit_lit_memmove_standalone_memmove_move_129through256 JMP emit_lit_memmove_standalone_memmove_move_256through2048 emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), SI MOVB -1(CX)(DX*1), CL MOVB SI, (AX) MOVB CL, -1(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), SI MOVB 2(CX), CL MOVW SI, (AX) MOVB CL, 2(AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_5through7: MOVL (CX), SI MOVL -4(CX)(DX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_9through16: MOVQ (CX), SI MOVQ -8(CX)(DX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(DX*1), X8 MOVOU -112(CX)(DX*1), X9 MOVOU -96(CX)(DX*1), X10 MOVOU -80(CX)(DX*1), X11 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DX*1) MOVOU X9, -112(AX)(DX*1) MOVOU X10, -96(AX)(DX*1) MOVOU X11, -80(AX)(DX*1) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_256through2048: LEAQ -256(DX), DX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_standalone_memmove_move_256through2048 JMP emit_lit_memmove_standalone_memmove_tail emit_literal_end_standalone: MOVQ BX, ret+48(FP) RET // func emitLiteralAvx(dst []byte, lit []byte) int // Requires: AVX, SSE2 TEXT ·emitLiteralAvx(SB), NOSPLIT, $0-56 MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX MOVQ lit_len+32(FP), DX MOVQ DX, BX MOVQ DX, SI SUBL $0x01, SI JC emit_literal_end_avx_standalone CMPL SI, $0x3c JLT one_byte_standalone CMPL SI, $0x00000100 JLT two_bytes_standalone CMPL SI, $0x00010000 JLT three_bytes_standalone CMPL SI, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_standalone four_bytes_standalone: MOVQ SI, DI SHRL $0x10, DI MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB DI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP memmove_standalone one_byte_standalone: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: NOP emit_lit_memmove_standalone_memmove_tail: TESTQ DX, DX JEQ emit_literal_end_avx_standalone CMPQ DX, $0x02 JBE emit_lit_memmove_standalone_memmove_move_1or2 CMPQ DX, $0x04 JB emit_lit_memmove_standalone_memmove_move_3 JBE emit_lit_memmove_standalone_memmove_move_4 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_5through7 JE emit_lit_memmove_standalone_memmove_move_8 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_9through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 CMPQ DX, $0x40 JBE emit_lit_memmove_standalone_memmove_move_33through64 CMPQ DX, $0x80 JBE emit_lit_memmove_standalone_memmove_move_65through128 CMPQ DX, $0x00000100 JBE emit_lit_memmove_standalone_memmove_move_129through256 JMP emit_lit_memmove_standalone_memmove_avxUnaligned emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), SI MOVB -1(CX)(DX*1), DI MOVB SI, (AX) MOVB DI, -1(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), SI MOVB 2(CX), DI MOVW SI, (AX) MOVB DI, 2(AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_5through7: MOVL (CX), SI MOVL -4(CX)(DX*1), DI MOVL SI, (AX) MOVL DI, -4(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_9through16: MOVQ (CX), SI MOVQ -8(CX)(DX*1), DI MOVQ SI, (AX) MOVQ DI, -8(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(DX*1), X8 MOVOU -112(CX)(DX*1), X9 MOVOU -96(CX)(DX*1), X10 MOVOU -80(CX)(DX*1), X11 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DX*1) MOVOU X9, -112(AX)(DX*1) MOVOU X10, -96(AX)(DX*1) MOVOU X11, -80(AX)(DX*1) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_256through2048: LEAQ -256(DX), DX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_standalone_memmove_move_256through2048 JMP emit_lit_memmove_standalone_memmove_tail emit_lit_memmove_standalone_memmove_avxUnaligned: LEAQ (CX)(DX*1), DI MOVQ AX, R9 MOVOU -128(DI), X5 MOVOU -112(DI), X6 MOVQ $0x00000080, SI ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(DI), X7 MOVOU -80(DI), X8 MOVQ AX, R8 SUBQ R9, R8 MOVOU -64(DI), X9 MOVOU -48(DI), X10 SUBQ R8, DX MOVOU -32(DI), X11 MOVOU -16(DI), X12 VMOVDQU (CX), Y4 ADDQ R8, CX SUBQ SI, DX emit_lit_memmove_standalone_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ SI, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ SI, AX SUBQ SI, DX JA emit_lit_memmove_standalone_memmove_gobble_128_loop ADDQ SI, DX ADDQ AX, DX VMOVDQU Y4, (R9) VZEROUPPER MOVOU X5, -128(DX) MOVOU X6, -112(DX) MOVOU X7, -96(DX) MOVOU X8, -80(DX) MOVOU X9, -64(DX) MOVOU X10, -48(DX) MOVOU X11, -32(DX) MOVOU X12, -16(DX) emit_literal_end_avx_standalone: MOVQ BX, ret+48(FP) RET // func emitRepeat(dst []byte, offset int, length int) int TEXT ·emitRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX emit_repeat_again_standalone: MOVQ DX, SI LEAQ -4(DX), DX CMPL SI, $0x08 JLE repeat_two_standalone CMPL SI, $0x0c JGE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 JLT repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 JLT repeat_three_standalone CMPL DX, $0x00010100 JLT repeat_four_standalone CMPL DX, $0x0100ffff JLT repeat_five_standalone LEAQ -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone repeat_five_standalone: LEAQ -65536(DX), DX MOVQ DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARQ $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_repeat_end repeat_four_standalone: LEAQ -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_repeat_end repeat_three_standalone: LEAQ -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_repeat_end repeat_two_standalone: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_repeat_end repeat_two_offset_standalone: XORQ SI, SI LEAQ 1(SI)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX gen_emit_repeat_end: MOVQ BX, ret+40(FP) RET // func emitCopy(dst []byte, offset int, length int) int TEXT ·emitCopy(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX CMPL CX, $0x00010000 JL two_byte_offset_standalone CMPL DX, $0x40 JLE four_bytes_remain_standalone MOVB $0xff, (AX) MOVD CX, 1(AX) LEAQ -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone emit_repeat_again_standalone_emit_copy: MOVQ DX, SI LEAQ -4(DX), DX CMPL SI, $0x08 JLE repeat_two_standalone_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy LEAQ -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy repeat_five_standalone_emit_copy: LEAQ -65536(DX), DX MOVQ DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARQ $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy: LEAQ -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy: LEAQ -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy: XORQ SI, SI LEAQ 1(SI)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end MOVB $0x03, SI LEAQ -4(SI)(DX*4), DX MOVB DL, (AX) MOVD CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end two_byte_offset_standalone: CMPL DX, $0x40 JLE two_byte_offset_short_standalone MOVB $0xee, (AX) MOVW CX, 1(AX) LEAQ -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX emit_repeat_again_standalone_emit_copy_short: MOVQ DX, SI LEAQ -4(DX), DX CMPL SI, $0x08 JLE repeat_two_standalone_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy_short LEAQ -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy_short repeat_five_standalone_emit_copy_short: LEAQ -65536(DX), DX MOVQ DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARQ $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy_short: LEAQ -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy_short: LEAQ -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy_short: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy_short: XORQ SI, SI LEAQ 1(SI)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end two_byte_offset_short_standalone: CMPL DX, $0x0c JGE emit_copy_three_standalone CMPL CX, $0x00000800 JGE emit_copy_three_standalone MOVB $0x01, SI LEAQ -16(SI)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: MOVB $0x02, SI LEAQ -4(SI)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end: MOVQ BX, ret+40(FP) RET // func matchLen(a []byte, b []byte) int TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX MOVQ a_len+8(FP), DX XORQ SI, SI CMPQ DX, $0x08 JL matchlen_single_standalone matchlen_loopback_standalone: MOVQ (AX)(SI*1), BX XORQ (CX)(SI*1), BX TESTQ BX, BX JZ matchlen_loop_standalone BSFQ BX, BX SARQ $0x03, BX LEAQ (SI)(BX*1), SI JMP gen_match_len_end matchlen_loop_standalone: LEAQ -8(DX), DX LEAQ 8(SI), SI CMPQ DX, $0x08 JGE matchlen_loopback_standalone matchlen_single_standalone: TESTQ DX, DX JZ gen_match_len_end matchlen_single_loopback_standalone: MOVB (AX)(SI*1), BL CMPB (CX)(SI*1), BL JNE gen_match_len_end LEAQ 1(SI), SI DECQ DX JNZ matchlen_single_loopback_standalone gen_match_len_end: MOVQ SI, ret+48(FP) RET