printer: use tabwriter to align instructions (#8)
This commit is contained in:
30
README.md
30
README.md
@@ -65,10 +65,10 @@ After running `go generate` the [`add.s`](examples/add/add.s) file will contain
|
|||||||
|
|
||||||
// func Add(x uint64, y uint64) uint64
|
// func Add(x uint64, y uint64) uint64
|
||||||
TEXT ·Add(SB), NOSPLIT, $0-24
|
TEXT ·Add(SB), NOSPLIT, $0-24
|
||||||
MOVQ x(FP), AX
|
MOVQ x(FP), AX
|
||||||
MOVQ y+8(FP), CX
|
MOVQ y+8(FP), CX
|
||||||
ADDQ AX, CX
|
ADDQ AX, CX
|
||||||
MOVQ CX, ret+16(FP)
|
MOVQ CX, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -137,18 +137,20 @@ The result from this code generator is:
|
|||||||
|
|
||||||
// func Sum(xs []uint64) uint64
|
// func Sum(xs []uint64) uint64
|
||||||
TEXT ·Sum(SB), NOSPLIT, $0-32
|
TEXT ·Sum(SB), NOSPLIT, $0-32
|
||||||
MOVQ xs_base(FP), AX
|
MOVQ xs_base(FP), AX
|
||||||
MOVQ xs_len+8(FP), CX
|
MOVQ xs_len+8(FP), CX
|
||||||
XORQ DX, DX
|
XORQ DX, DX
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
CMPQ CX, $0x00
|
CMPQ CX, $0x00
|
||||||
JE done
|
JE done
|
||||||
ADDQ (AX), DX
|
ADDQ (AX), DX
|
||||||
ADDQ $0x08, AX
|
ADDQ $0x08, AX
|
||||||
DECQ CX
|
DECQ CX
|
||||||
JMP loop
|
JMP loop
|
||||||
|
|
||||||
done:
|
done:
|
||||||
MOVQ DX, ret+24(FP)
|
MOVQ DX, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -43,9 +43,9 @@ This produces [`add.s`](add.s) as follows:
|
|||||||
|
|
||||||
// func Add(x uint64, y uint64) uint64
|
// func Add(x uint64, y uint64) uint64
|
||||||
TEXT ·Add(SB), NOSPLIT, $0-24
|
TEXT ·Add(SB), NOSPLIT, $0-24
|
||||||
MOVQ x(FP), AX
|
MOVQ x(FP), AX
|
||||||
MOVQ y+8(FP), CX
|
MOVQ y+8(FP), CX
|
||||||
ADDQ AX, CX
|
ADDQ AX, CX
|
||||||
MOVQ CX, ret+16(FP)
|
MOVQ CX, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
|
|
||||||
// func Add(x uint64, y uint64) uint64
|
// func Add(x uint64, y uint64) uint64
|
||||||
TEXT ·Add(SB), NOSPLIT, $0-24
|
TEXT ·Add(SB), NOSPLIT, $0-24
|
||||||
MOVQ x(FP), AX
|
MOVQ x(FP), AX
|
||||||
MOVQ y+8(FP), CX
|
MOVQ y+8(FP), CX
|
||||||
ADDQ AX, CX
|
ADDQ AX, CX
|
||||||
MOVQ CX, ret+16(FP)
|
MOVQ CX, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -20,8 +20,8 @@ This `avo` code will generate the following assembly. Note that parameter refere
|
|||||||
```s
|
```s
|
||||||
// func Second(x int32, y int32) int32
|
// func Second(x int32, y int32) int32
|
||||||
TEXT ·Second(SB), NOSPLIT, $0-12
|
TEXT ·Second(SB), NOSPLIT, $0-12
|
||||||
MOVL y+4(FP), AX
|
MOVL y+4(FP), AX
|
||||||
MOVL AX, ret+8(FP)
|
MOVL AX, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -4,108 +4,108 @@
|
|||||||
|
|
||||||
// func Second(x int32, y int32) int32
|
// func Second(x int32, y int32) int32
|
||||||
TEXT ·Second(SB), NOSPLIT, $0-12
|
TEXT ·Second(SB), NOSPLIT, $0-12
|
||||||
MOVL y+4(FP), AX
|
MOVL y+4(FP), AX
|
||||||
MOVL AX, ret+8(FP)
|
MOVL AX, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func StringLen(s string) int
|
// func StringLen(s string) int
|
||||||
TEXT ·StringLen(SB), NOSPLIT, $0-24
|
TEXT ·StringLen(SB), NOSPLIT, $0-24
|
||||||
MOVQ s_len+8(FP), AX
|
MOVQ s_len+8(FP), AX
|
||||||
MOVQ AX, ret+16(FP)
|
MOVQ AX, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func SliceLen(s []int) int
|
// func SliceLen(s []int) int
|
||||||
TEXT ·SliceLen(SB), NOSPLIT, $0-32
|
TEXT ·SliceLen(SB), NOSPLIT, $0-32
|
||||||
MOVQ s_len+8(FP), AX
|
MOVQ s_len+8(FP), AX
|
||||||
MOVQ AX, ret+24(FP)
|
MOVQ AX, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func SliceCap(s []int) int
|
// func SliceCap(s []int) int
|
||||||
TEXT ·SliceCap(SB), NOSPLIT, $0-32
|
TEXT ·SliceCap(SB), NOSPLIT, $0-32
|
||||||
MOVQ s_cap+16(FP), AX
|
MOVQ s_cap+16(FP), AX
|
||||||
MOVQ AX, ret+24(FP)
|
MOVQ AX, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func ArrayThree(a [7]uint64) uint64
|
// func ArrayThree(a [7]uint64) uint64
|
||||||
TEXT ·ArrayThree(SB), NOSPLIT, $0-64
|
TEXT ·ArrayThree(SB), NOSPLIT, $0-64
|
||||||
MOVQ a_3+24(FP), AX
|
MOVQ a_3+24(FP), AX
|
||||||
MOVQ AX, ret+56(FP)
|
MOVQ AX, ret+56(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldByte(s Struct) byte
|
// func FieldByte(s Struct) byte
|
||||||
TEXT ·FieldByte(SB), NOSPLIT, $0-177
|
TEXT ·FieldByte(SB), NOSPLIT, $0-177
|
||||||
MOVB s_Byte(FP), AL
|
MOVB s_Byte(FP), AL
|
||||||
MOVB AL, ret+176(FP)
|
MOVB AL, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldInt8(s Struct) int8
|
// func FieldInt8(s Struct) int8
|
||||||
TEXT ·FieldInt8(SB), NOSPLIT, $0-177
|
TEXT ·FieldInt8(SB), NOSPLIT, $0-177
|
||||||
MOVB s_Int8+1(FP), AL
|
MOVB s_Int8+1(FP), AL
|
||||||
MOVB AL, ret+176(FP)
|
MOVB AL, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldUint16(s Struct) uint16
|
// func FieldUint16(s Struct) uint16
|
||||||
TEXT ·FieldUint16(SB), NOSPLIT, $0-178
|
TEXT ·FieldUint16(SB), NOSPLIT, $0-178
|
||||||
MOVW s_Uint16+2(FP), AX
|
MOVW s_Uint16+2(FP), AX
|
||||||
MOVW AX, ret+176(FP)
|
MOVW AX, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldInt32(s Struct) int32
|
// func FieldInt32(s Struct) int32
|
||||||
TEXT ·FieldInt32(SB), NOSPLIT, $0-180
|
TEXT ·FieldInt32(SB), NOSPLIT, $0-180
|
||||||
MOVL s_Int32+4(FP), AX
|
MOVL s_Int32+4(FP), AX
|
||||||
MOVL AX, ret+176(FP)
|
MOVL AX, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldUint64(s Struct) uint64
|
// func FieldUint64(s Struct) uint64
|
||||||
TEXT ·FieldUint64(SB), NOSPLIT, $0-184
|
TEXT ·FieldUint64(SB), NOSPLIT, $0-184
|
||||||
MOVQ s_Uint64+8(FP), AX
|
MOVQ s_Uint64+8(FP), AX
|
||||||
MOVQ AX, ret+176(FP)
|
MOVQ AX, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldFloat32(s Struct) float32
|
// func FieldFloat32(s Struct) float32
|
||||||
TEXT ·FieldFloat32(SB), NOSPLIT, $0-180
|
TEXT ·FieldFloat32(SB), NOSPLIT, $0-180
|
||||||
MOVSS s_Float32+16(FP), X0
|
MOVSS s_Float32+16(FP), X0
|
||||||
MOVSS X0, ret+176(FP)
|
MOVSS X0, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldFloat64(s Struct) float64
|
// func FieldFloat64(s Struct) float64
|
||||||
TEXT ·FieldFloat64(SB), NOSPLIT, $0-184
|
TEXT ·FieldFloat64(SB), NOSPLIT, $0-184
|
||||||
MOVSD s_Float64+24(FP), X0
|
MOVSD s_Float64+24(FP), X0
|
||||||
MOVSD X0, ret+176(FP)
|
MOVSD X0, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldStringLen(s Struct) int
|
// func FieldStringLen(s Struct) int
|
||||||
TEXT ·FieldStringLen(SB), NOSPLIT, $0-184
|
TEXT ·FieldStringLen(SB), NOSPLIT, $0-184
|
||||||
MOVQ s_String_len+40(FP), AX
|
MOVQ s_String_len+40(FP), AX
|
||||||
MOVQ AX, ret+176(FP)
|
MOVQ AX, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldSliceCap(s Struct) int
|
// func FieldSliceCap(s Struct) int
|
||||||
TEXT ·FieldSliceCap(SB), NOSPLIT, $0-184
|
TEXT ·FieldSliceCap(SB), NOSPLIT, $0-184
|
||||||
MOVQ s_Slice_cap+64(FP), AX
|
MOVQ s_Slice_cap+64(FP), AX
|
||||||
MOVQ AX, ret+176(FP)
|
MOVQ AX, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldArrayTwoBTwo(s Struct) byte
|
// func FieldArrayTwoBTwo(s Struct) byte
|
||||||
TEXT ·FieldArrayTwoBTwo(SB), NOSPLIT, $0-177
|
TEXT ·FieldArrayTwoBTwo(SB), NOSPLIT, $0-177
|
||||||
MOVB s_Array_2_B_2+114(FP), AL
|
MOVB s_Array_2_B_2+114(FP), AL
|
||||||
MOVB AL, ret+176(FP)
|
MOVB AL, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldArrayOneC(s Struct) uint16
|
// func FieldArrayOneC(s Struct) uint16
|
||||||
TEXT ·FieldArrayOneC(SB), NOSPLIT, $0-178
|
TEXT ·FieldArrayOneC(SB), NOSPLIT, $0-178
|
||||||
MOVW s_Array_1_C+100(FP), AX
|
MOVW s_Array_1_C+100(FP), AX
|
||||||
MOVW AX, ret+176(FP)
|
MOVW AX, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldComplex64Imag(s Struct) float32
|
// func FieldComplex64Imag(s Struct) float32
|
||||||
TEXT ·FieldComplex64Imag(SB), NOSPLIT, $0-180
|
TEXT ·FieldComplex64Imag(SB), NOSPLIT, $0-180
|
||||||
MOVSS s_Complex64_imag+156(FP), X0
|
MOVSS s_Complex64_imag+156(FP), X0
|
||||||
MOVSS X0, ret+176(FP)
|
MOVSS X0, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func FieldComplex128Real(s Struct) float64
|
// func FieldComplex128Real(s Struct) float64
|
||||||
TEXT ·FieldComplex128Real(SB), NOSPLIT, $0-184
|
TEXT ·FieldComplex128Real(SB), NOSPLIT, $0-184
|
||||||
MOVSD s_Complex128_real+160(FP), X0
|
MOVSD s_Complex128_real+160(FP), X0
|
||||||
MOVSD X0, ret+176(FP)
|
MOVSD X0, ret+176(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
BIN
examples/backup.tar.gz
Normal file
BIN
examples/backup.tar.gz
Normal file
Binary file not shown.
@@ -25,12 +25,12 @@ Generated assembly:
|
|||||||
```s
|
```s
|
||||||
// func Norm(z complex128) float64
|
// func Norm(z complex128) float64
|
||||||
TEXT ·Norm(SB), NOSPLIT, $0-24
|
TEXT ·Norm(SB), NOSPLIT, $0-24
|
||||||
MOVSD z_real(FP), X0
|
MOVSD z_real(FP), X0
|
||||||
MOVSD z_imag+8(FP), X1
|
MOVSD z_imag+8(FP), X1
|
||||||
MULSD X0, X0
|
MULSD X0, X0
|
||||||
MULSD X1, X1
|
MULSD X1, X1
|
||||||
ADDSD X1, X0
|
ADDSD X1, X0
|
||||||
SQRTSD X0, X2
|
SQRTSD X0, X2
|
||||||
MOVSD X2, ret+16(FP)
|
MOVSD X2, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -4,23 +4,23 @@
|
|||||||
|
|
||||||
// func Real(z complex128) float64
|
// func Real(z complex128) float64
|
||||||
TEXT ·Real(SB), NOSPLIT, $0-24
|
TEXT ·Real(SB), NOSPLIT, $0-24
|
||||||
MOVSD z_real(FP), X0
|
MOVSD z_real(FP), X0
|
||||||
MOVSD X0, ret+16(FP)
|
MOVSD X0, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func Imag(z complex128) float64
|
// func Imag(z complex128) float64
|
||||||
TEXT ·Imag(SB), NOSPLIT, $0-24
|
TEXT ·Imag(SB), NOSPLIT, $0-24
|
||||||
MOVSD z_imag+8(FP), X0
|
MOVSD z_imag+8(FP), X0
|
||||||
MOVSD X0, ret+16(FP)
|
MOVSD X0, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func Norm(z complex128) float64
|
// func Norm(z complex128) float64
|
||||||
TEXT ·Norm(SB), NOSPLIT, $0-24
|
TEXT ·Norm(SB), NOSPLIT, $0-24
|
||||||
MOVSD z_real(FP), X0
|
MOVSD z_real(FP), X0
|
||||||
MOVSD z_imag+8(FP), X1
|
MOVSD z_imag+8(FP), X1
|
||||||
MULSD X0, X0
|
MULSD X0, X0
|
||||||
MULSD X1, X1
|
MULSD X1, X1
|
||||||
ADDSD X1, X0
|
ADDSD X1, X0
|
||||||
SQRTSD X0, X2
|
SQRTSD X0, X2
|
||||||
MOVSD X2, ret+16(FP)
|
MOVSD X2, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ GLOBL bytes<>(SB), RODATA|NOPTR, $40
|
|||||||
|
|
||||||
// func DataAt(i int) byte
|
// func DataAt(i int) byte
|
||||||
TEXT ·DataAt(SB), NOSPLIT, $0-9
|
TEXT ·DataAt(SB), NOSPLIT, $0-9
|
||||||
MOVQ i(FP), AX
|
MOVQ i(FP), AX
|
||||||
LEAQ bytes<>(SB), CX
|
LEAQ bytes<>(SB), CX
|
||||||
MOVB (CX)(AX*1), AL
|
MOVB (CX)(AX*1), AL
|
||||||
MOVB AL, ret+8(FP)
|
MOVB AL, ret+8(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -4,55 +4,59 @@
|
|||||||
|
|
||||||
// func Dot(x []float32, y []float32) float32
|
// func Dot(x []float32, y []float32) float32
|
||||||
TEXT ·Dot(SB), NOSPLIT, $0-52
|
TEXT ·Dot(SB), NOSPLIT, $0-52
|
||||||
MOVQ x_base(FP), AX
|
MOVQ x_base(FP), AX
|
||||||
MOVQ y_base+24(FP), CX
|
MOVQ y_base+24(FP), CX
|
||||||
MOVQ x_len+8(FP), DX
|
MOVQ x_len+8(FP), DX
|
||||||
VXORPS Y0, Y0, Y0
|
VXORPS Y0, Y0, Y0
|
||||||
VXORPS Y1, Y1, Y1
|
VXORPS Y1, Y1, Y1
|
||||||
VXORPS Y2, Y2, Y2
|
VXORPS Y2, Y2, Y2
|
||||||
VXORPS Y3, Y3, Y3
|
VXORPS Y3, Y3, Y3
|
||||||
VXORPS Y4, Y4, Y4
|
VXORPS Y4, Y4, Y4
|
||||||
VXORPS Y5, Y5, Y5
|
VXORPS Y5, Y5, Y5
|
||||||
|
|
||||||
blockloop:
|
blockloop:
|
||||||
CMPQ DX, $0x00000030
|
CMPQ DX, $0x00000030
|
||||||
JL tail
|
JL tail
|
||||||
VMOVUPS (AX), Y6
|
VMOVUPS (AX), Y6
|
||||||
VMOVUPS 32(AX), Y7
|
VMOVUPS 32(AX), Y7
|
||||||
VMOVUPS 64(AX), Y8
|
VMOVUPS 64(AX), Y8
|
||||||
VMOVUPS 96(AX), Y9
|
VMOVUPS 96(AX), Y9
|
||||||
VMOVUPS 128(AX), Y10
|
VMOVUPS 128(AX), Y10
|
||||||
VMOVUPS 160(AX), Y11
|
VMOVUPS 160(AX), Y11
|
||||||
VFMADD231PS (CX), Y6, Y0
|
VFMADD231PS (CX), Y6, Y0
|
||||||
VFMADD231PS 32(CX), Y7, Y1
|
VFMADD231PS 32(CX), Y7, Y1
|
||||||
VFMADD231PS 64(CX), Y8, Y2
|
VFMADD231PS 64(CX), Y8, Y2
|
||||||
VFMADD231PS 96(CX), Y9, Y3
|
VFMADD231PS 96(CX), Y9, Y3
|
||||||
VFMADD231PS 128(CX), Y10, Y4
|
VFMADD231PS 128(CX), Y10, Y4
|
||||||
VFMADD231PS 160(CX), Y11, Y5
|
VFMADD231PS 160(CX), Y11, Y5
|
||||||
ADDQ $0x000000c0, AX
|
ADDQ $0x000000c0, AX
|
||||||
ADDQ $0x000000c0, CX
|
ADDQ $0x000000c0, CX
|
||||||
SUBQ $0x00000030, DX
|
SUBQ $0x00000030, DX
|
||||||
JMP blockloop
|
JMP blockloop
|
||||||
|
|
||||||
tail:
|
tail:
|
||||||
VXORPS X12, X12, X12
|
VXORPS X12, X12, X12
|
||||||
|
|
||||||
tailloop:
|
tailloop:
|
||||||
CMPQ DX, $0x00000000
|
CMPQ DX, $0x00000000
|
||||||
JE reduce
|
JE reduce
|
||||||
VMOVSS (AX), X6
|
VMOVSS (AX), X6
|
||||||
VFMADD231SS (CX), X6, X12
|
VFMADD231SS (CX), X6, X12
|
||||||
ADDQ $0x00000004, AX
|
ADDQ $0x00000004, AX
|
||||||
ADDQ $0x00000004, CX
|
ADDQ $0x00000004, CX
|
||||||
DECQ DX
|
DECQ DX
|
||||||
JMP tailloop
|
JMP tailloop
|
||||||
|
|
||||||
reduce:
|
reduce:
|
||||||
VADDPS Y0, Y1, Y0
|
VADDPS Y0, Y1, Y0
|
||||||
VADDPS Y0, Y2, Y0
|
VADDPS Y0, Y2, Y0
|
||||||
VADDPS Y0, Y3, Y0
|
VADDPS Y0, Y3, Y0
|
||||||
VADDPS Y0, Y4, Y0
|
VADDPS Y0, Y4, Y0
|
||||||
VADDPS Y0, Y5, Y0
|
VADDPS Y0, Y5, Y0
|
||||||
VEXTRACTF128 $0x01, Y0, X1
|
VEXTRACTF128 $0x01, Y0, X1
|
||||||
VADDPS X0, X1, X0
|
VADDPS X0, X1, X0
|
||||||
VADDPS X0, X12, X0
|
VADDPS X0, X12, X0
|
||||||
VHADDPS X0, X0, X0
|
VHADDPS X0, X0, X0
|
||||||
VHADDPS X0, X0, X0
|
VHADDPS X0, X0, X0
|
||||||
MOVSS X0, ret+48(FP)
|
MOVSS X0, ret+48(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -4,19 +4,21 @@
|
|||||||
|
|
||||||
// func Hash64(data []byte) uint64
|
// func Hash64(data []byte) uint64
|
||||||
TEXT ·Hash64(SB), NOSPLIT, $0-32
|
TEXT ·Hash64(SB), NOSPLIT, $0-32
|
||||||
MOVQ data_base(FP), CX
|
MOVQ data_base(FP), CX
|
||||||
MOVQ data_len+8(FP), BX
|
MOVQ data_len+8(FP), BX
|
||||||
MOVQ $0xcbf29ce484222325, AX
|
MOVQ $0xcbf29ce484222325, AX
|
||||||
MOVQ $0x00000100000001b3, BP
|
MOVQ $0x00000100000001b3, BP
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
CMPQ BX, $0x00
|
CMPQ BX, $0x00
|
||||||
JE done
|
JE done
|
||||||
MOVBQZX (CX), DX
|
MOVBQZX (CX), DX
|
||||||
XORQ DX, AX
|
XORQ DX, AX
|
||||||
MULQ BP
|
MULQ BP
|
||||||
INCQ CX
|
INCQ CX
|
||||||
DECQ BX
|
DECQ BX
|
||||||
JMP loop
|
JMP loop
|
||||||
|
|
||||||
done:
|
done:
|
||||||
MOVQ AX, ret+24(FP)
|
MOVQ AX, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -4,21 +4,21 @@
|
|||||||
|
|
||||||
// func EncodeInt(lat float64, lng float64) uint64
|
// func EncodeInt(lat float64, lng float64) uint64
|
||||||
TEXT ·EncodeInt(SB), NOSPLIT, $0-24
|
TEXT ·EncodeInt(SB), NOSPLIT, $0-24
|
||||||
MOVSD lat(FP), X0
|
MOVSD lat(FP), X0
|
||||||
MOVSD lng+8(FP), X1
|
MOVSD lng+8(FP), X1
|
||||||
MULSD reciprocal180<>(SB), X0
|
MULSD reciprocal180<>(SB), X0
|
||||||
ADDSD onepointfive<>(SB), X0
|
ADDSD onepointfive<>(SB), X0
|
||||||
MULSD reciprocal360<>(SB), X1
|
MULSD reciprocal360<>(SB), X1
|
||||||
ADDSD onepointfive<>(SB), X1
|
ADDSD onepointfive<>(SB), X1
|
||||||
MOVQ X0, CX
|
MOVQ X0, CX
|
||||||
SHRQ $0x14, CX
|
SHRQ $0x14, CX
|
||||||
MOVQ X1, AX
|
MOVQ X1, AX
|
||||||
SHRQ $0x14, AX
|
SHRQ $0x14, AX
|
||||||
PDEPQ mask<>(SB), CX, CX
|
PDEPQ mask<>(SB), CX, CX
|
||||||
PDEPQ mask<>(SB), AX, AX
|
PDEPQ mask<>(SB), AX, AX
|
||||||
SHLQ $0x01, AX
|
SHLQ $0x01, AX
|
||||||
XORQ AX, CX
|
XORQ AX, CX
|
||||||
MOVQ CX, ret+16(FP)
|
MOVQ CX, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
DATA reciprocal180<>(SB)/8, $(0.005555555555555556)
|
DATA reciprocal180<>(SB)/8, $(0.005555555555555556)
|
||||||
|
|||||||
@@ -4,43 +4,43 @@
|
|||||||
|
|
||||||
// func Interval(start uint64, size uint64) (uint64, uint64)
|
// func Interval(start uint64, size uint64) (uint64, uint64)
|
||||||
TEXT ·Interval(SB), NOSPLIT, $0-32
|
TEXT ·Interval(SB), NOSPLIT, $0-32
|
||||||
MOVQ start(FP), AX
|
MOVQ start(FP), AX
|
||||||
MOVQ size+8(FP), CX
|
MOVQ size+8(FP), CX
|
||||||
ADDQ AX, CX
|
ADDQ AX, CX
|
||||||
MOVQ AX, ret+16(FP)
|
MOVQ AX, ret+16(FP)
|
||||||
MOVQ CX, ret1+24(FP)
|
MOVQ CX, ret1+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func Butterfly(x0 float64, x1 float64) (y0 float64, y1 float64)
|
// func Butterfly(x0 float64, x1 float64) (y0 float64, y1 float64)
|
||||||
TEXT ·Butterfly(SB), NOSPLIT, $0-32
|
TEXT ·Butterfly(SB), NOSPLIT, $0-32
|
||||||
MOVSD x0(FP), X0
|
MOVSD x0(FP), X0
|
||||||
MOVSD x1+8(FP), X1
|
MOVSD x1+8(FP), X1
|
||||||
MOVSD X0, X2
|
MOVSD X0, X2
|
||||||
ADDSD X1, X2
|
ADDSD X1, X2
|
||||||
MOVSD X0, X3
|
MOVSD X0, X3
|
||||||
SUBSD X1, X3
|
SUBSD X1, X3
|
||||||
MOVSD X2, y0+16(FP)
|
MOVSD X2, y0+16(FP)
|
||||||
MOVSD X3, y1+24(FP)
|
MOVSD X3, y1+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func Septuple(byte) [7]byte
|
// func Septuple(byte) [7]byte
|
||||||
TEXT ·Septuple(SB), NOSPLIT, $0-15
|
TEXT ·Septuple(SB), NOSPLIT, $0-15
|
||||||
MOVB arg(FP), AL
|
MOVB arg(FP), AL
|
||||||
MOVB AL, ret_0+8(FP)
|
MOVB AL, ret_0+8(FP)
|
||||||
MOVB AL, ret_1+9(FP)
|
MOVB AL, ret_1+9(FP)
|
||||||
MOVB AL, ret_2+10(FP)
|
MOVB AL, ret_2+10(FP)
|
||||||
MOVB AL, ret_3+11(FP)
|
MOVB AL, ret_3+11(FP)
|
||||||
MOVB AL, ret_4+12(FP)
|
MOVB AL, ret_4+12(FP)
|
||||||
MOVB AL, ret_5+13(FP)
|
MOVB AL, ret_5+13(FP)
|
||||||
MOVB AL, ret_6+14(FP)
|
MOVB AL, ret_6+14(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func CriticalLine(t float64) complex128
|
// func CriticalLine(t float64) complex128
|
||||||
TEXT ·CriticalLine(SB), NOSPLIT, $0-24
|
TEXT ·CriticalLine(SB), NOSPLIT, $0-24
|
||||||
MOVSD t(FP), X0
|
MOVSD t(FP), X0
|
||||||
MOVSD half<>(SB), X1
|
MOVSD half<>(SB), X1
|
||||||
MOVSD X1, ret_real+8(FP)
|
MOVSD X1, ret_real+8(FP)
|
||||||
MOVSD X0, ret_imag+16(FP)
|
MOVSD X0, ret_imag+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
DATA half<>(SB)/8, $(0.5)
|
DATA half<>(SB)/8, $(0.5)
|
||||||
@@ -48,12 +48,12 @@ GLOBL half<>(SB), RODATA|NOPTR, $8
|
|||||||
|
|
||||||
// func NewStruct(w uint16, p [2]float64, q uint64) Struct
|
// func NewStruct(w uint16, p [2]float64, q uint64) Struct
|
||||||
TEXT ·NewStruct(SB), NOSPLIT, $0-64
|
TEXT ·NewStruct(SB), NOSPLIT, $0-64
|
||||||
MOVW w(FP), AX
|
MOVW w(FP), AX
|
||||||
MOVSD p_0+8(FP), X0
|
MOVSD p_0+8(FP), X0
|
||||||
MOVSD p_1+16(FP), X1
|
MOVSD p_1+16(FP), X1
|
||||||
MOVQ q+24(FP), CX
|
MOVQ q+24(FP), CX
|
||||||
MOVW AX, ret_Word+32(FP)
|
MOVW AX, ret_Word+32(FP)
|
||||||
MOVSD X0, ret_Point_0+40(FP)
|
MOVSD X0, ret_Point_0+40(FP)
|
||||||
MOVSD X1, ret_Point_1+48(FP)
|
MOVSD X1, ret_Point_1+48(FP)
|
||||||
MOVQ CX, ret_Quad+56(FP)
|
MOVQ CX, ret_Quad+56(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
2668
examples/sha1/sha1.s
2668
examples/sha1/sha1.s
File diff suppressed because it is too large
Load Diff
@@ -4,307 +4,335 @@
|
|||||||
|
|
||||||
// func Hash(state *State, key []byte) uint64
|
// func Hash(state *State, key []byte) uint64
|
||||||
TEXT ·Hash(SB), NOSPLIT, $0-40
|
TEXT ·Hash(SB), NOSPLIT, $0-40
|
||||||
MOVQ state(FP), AX
|
MOVQ state(FP), AX
|
||||||
MOVQ key_base+8(FP), CX
|
MOVQ key_base+8(FP), CX
|
||||||
MOVQ key_len+16(FP), DX
|
MOVQ key_len+16(FP), DX
|
||||||
MOVQ (AX), BX
|
MOVQ (AX), BX
|
||||||
MOVQ 8(AX), BP
|
MOVQ 8(AX), BP
|
||||||
MOVQ DX, SI
|
MOVQ DX, SI
|
||||||
ADDQ $0x00000001, SI
|
ADDQ $0x00000001, SI
|
||||||
MOVQ $0xb89b0f8e1655514f, DI
|
MOVQ $0xb89b0f8e1655514f, DI
|
||||||
IMULQ DI, SI
|
IMULQ DI, SI
|
||||||
XORQ SI, BX
|
XORQ SI, BX
|
||||||
MOVQ DX, SI
|
MOVQ DX, SI
|
||||||
ADDQ $0x00000002, SI
|
ADDQ $0x00000002, SI
|
||||||
MOVQ $0x8c6f736011bd5127, DI
|
MOVQ $0x8c6f736011bd5127, DI
|
||||||
IMULQ DI, SI
|
IMULQ DI, SI
|
||||||
XORQ SI, BP
|
XORQ SI, BP
|
||||||
CMPQ DX, $0x00000020
|
CMPQ DX, $0x00000020
|
||||||
JGE coreLong
|
JGE coreLong
|
||||||
MOVQ DX, SI
|
MOVQ DX, SI
|
||||||
SHRQ $0x03, SI
|
SHRQ $0x03, SI
|
||||||
CMPQ SI, $0x00000000
|
CMPQ SI, $0x00000000
|
||||||
JE shortCore0
|
JE shortCore0
|
||||||
CMPQ SI, $0x00000001
|
CMPQ SI, $0x00000001
|
||||||
JE shortCore1
|
JE shortCore1
|
||||||
CMPQ SI, $0x00000002
|
CMPQ SI, $0x00000002
|
||||||
JE shortCore2
|
JE shortCore2
|
||||||
CMPQ SI, $0x00000003
|
CMPQ SI, $0x00000003
|
||||||
JE shortCore3
|
JE shortCore3
|
||||||
|
|
||||||
shortCore3:
|
shortCore3:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x9c1b8e1e9628323f, DI
|
MOVQ $0x9c1b8e1e9628323f, DI
|
||||||
IMULQ DI, SI
|
IMULQ DI, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
RORQ $0x11, BX
|
RORQ $0x11, BX
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
RORQ $0x35, BP
|
RORQ $0x35, BP
|
||||||
ADDQ BX, BP
|
ADDQ BX, BP
|
||||||
ADDQ $0x00000008, CX
|
ADDQ $0x00000008, CX
|
||||||
SUBQ $0x00000008, DX
|
SUBQ $0x00000008, DX
|
||||||
|
|
||||||
shortCore2:
|
shortCore2:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x9c1b8e1e9628323f, DI
|
MOVQ $0x9c1b8e1e9628323f, DI
|
||||||
IMULQ DI, SI
|
IMULQ DI, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
RORQ $0x11, BX
|
RORQ $0x11, BX
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
RORQ $0x35, BP
|
RORQ $0x35, BP
|
||||||
ADDQ BX, BP
|
ADDQ BX, BP
|
||||||
ADDQ $0x00000008, CX
|
ADDQ $0x00000008, CX
|
||||||
SUBQ $0x00000008, DX
|
SUBQ $0x00000008, DX
|
||||||
|
|
||||||
shortCore1:
|
shortCore1:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x9c1b8e1e9628323f, DI
|
MOVQ $0x9c1b8e1e9628323f, DI
|
||||||
IMULQ DI, SI
|
IMULQ DI, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
RORQ $0x11, BX
|
RORQ $0x11, BX
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
RORQ $0x35, BP
|
RORQ $0x35, BP
|
||||||
ADDQ BX, BP
|
ADDQ BX, BP
|
||||||
ADDQ $0x00000008, CX
|
ADDQ $0x00000008, CX
|
||||||
SUBQ $0x00000008, DX
|
SUBQ $0x00000008, DX
|
||||||
|
|
||||||
shortCore0:
|
shortCore0:
|
||||||
CMPQ DX, $0x00000000
|
CMPQ DX, $0x00000000
|
||||||
JE shortTail0
|
JE shortTail0
|
||||||
CMPQ DX, $0x00000001
|
CMPQ DX, $0x00000001
|
||||||
JE shortTail1
|
JE shortTail1
|
||||||
CMPQ DX, $0x00000002
|
CMPQ DX, $0x00000002
|
||||||
JE shortTail2
|
JE shortTail2
|
||||||
CMPQ DX, $0x00000003
|
CMPQ DX, $0x00000003
|
||||||
JE shortTail3
|
JE shortTail3
|
||||||
CMPQ DX, $0x00000004
|
CMPQ DX, $0x00000004
|
||||||
JE shortTail4
|
JE shortTail4
|
||||||
CMPQ DX, $0x00000005
|
CMPQ DX, $0x00000005
|
||||||
JE shortTail5
|
JE shortTail5
|
||||||
CMPQ DX, $0x00000006
|
CMPQ DX, $0x00000006
|
||||||
JE shortTail6
|
JE shortTail6
|
||||||
CMPQ DX, $0x00000007
|
CMPQ DX, $0x00000007
|
||||||
JE shortTail7
|
JE shortTail7
|
||||||
|
|
||||||
shortTail7:
|
shortTail7:
|
||||||
MOVBQZX 6(CX), SI
|
MOVBQZX 6(CX), SI
|
||||||
SHLQ $0x20, SI
|
SHLQ $0x20, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
|
|
||||||
shortTail6:
|
shortTail6:
|
||||||
MOVBQZX 5(CX), SI
|
MOVBQZX 5(CX), SI
|
||||||
SHLQ $0x30, SI
|
SHLQ $0x30, SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
|
|
||||||
shortTail5:
|
shortTail5:
|
||||||
MOVBQZX 4(CX), SI
|
MOVBQZX 4(CX), SI
|
||||||
SHLQ $0x10, SI
|
SHLQ $0x10, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
|
|
||||||
shortTail4:
|
shortTail4:
|
||||||
MOVLQZX (CX), SI
|
MOVLQZX (CX), SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
JMP shortAfter
|
JMP shortAfter
|
||||||
|
|
||||||
shortTail3:
|
shortTail3:
|
||||||
MOVBQZX 2(CX), SI
|
MOVBQZX 2(CX), SI
|
||||||
SHLQ $0x30, SI
|
SHLQ $0x30, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
|
|
||||||
shortTail2:
|
shortTail2:
|
||||||
MOVWQZX (CX), SI
|
MOVWQZX (CX), SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
JMP shortAfter
|
JMP shortAfter
|
||||||
|
|
||||||
shortTail1:
|
shortTail1:
|
||||||
MOVBQZX (CX), SI
|
MOVBQZX (CX), SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
|
|
||||||
shortTail0:
|
shortTail0:
|
||||||
RORQ $0x20, BP
|
RORQ $0x20, BP
|
||||||
XORQ $0x000000ff, BP
|
XORQ $0x000000ff, BP
|
||||||
|
|
||||||
shortAfter:
|
shortAfter:
|
||||||
XORQ BX, BP
|
XORQ BX, BP
|
||||||
RORQ $0x21, BX
|
RORQ $0x21, BX
|
||||||
ADDQ BP, BX
|
ADDQ BP, BX
|
||||||
ROLQ $0x11, BP
|
ROLQ $0x11, BP
|
||||||
XORQ BX, BP
|
XORQ BX, BP
|
||||||
ROLQ $0x2b, BX
|
ROLQ $0x2b, BX
|
||||||
ADDQ BP, BX
|
ADDQ BP, BX
|
||||||
ROLQ $0x1f, BP
|
ROLQ $0x1f, BP
|
||||||
SUBQ BX, BP
|
SUBQ BX, BP
|
||||||
ROLQ $0x0d, BX
|
ROLQ $0x0d, BX
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
SUBQ BX, BP
|
SUBQ BX, BP
|
||||||
ROLQ $0x29, BX
|
ROLQ $0x29, BX
|
||||||
ADDQ BP, BX
|
ADDQ BP, BX
|
||||||
ROLQ $0x25, BP
|
ROLQ $0x25, BP
|
||||||
XORQ BX, BP
|
XORQ BX, BP
|
||||||
RORQ $0x27, BX
|
RORQ $0x27, BX
|
||||||
ADDQ BP, BX
|
ADDQ BP, BX
|
||||||
RORQ $0x0f, BP
|
RORQ $0x0f, BP
|
||||||
ADDQ BX, BP
|
ADDQ BX, BP
|
||||||
ROLQ $0x0f, BX
|
ROLQ $0x0f, BX
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
RORQ $0x05, BP
|
RORQ $0x05, BP
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
MOVQ BX, ret+32(FP)
|
MOVQ BX, ret+32(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
coreLong:
|
coreLong:
|
||||||
MOVQ 16(AX), DI
|
MOVQ 16(AX), DI
|
||||||
MOVQ 24(AX), AX
|
MOVQ 24(AX), AX
|
||||||
MOVQ DX, SI
|
MOVQ DX, SI
|
||||||
ADDQ $0x00000003, SI
|
ADDQ $0x00000003, SI
|
||||||
MOVQ $0x8f29bd94edce7b39, R8
|
MOVQ $0x8f29bd94edce7b39, R8
|
||||||
IMULQ R8, SI
|
IMULQ R8, SI
|
||||||
XORQ SI, DI
|
XORQ SI, DI
|
||||||
MOVQ DX, SI
|
MOVQ DX, SI
|
||||||
ADDQ $0x00000004, SI
|
ADDQ $0x00000004, SI
|
||||||
MOVQ $0x9c1b8e1e9628323f, R8
|
MOVQ $0x9c1b8e1e9628323f, R8
|
||||||
IMULQ R8, SI
|
IMULQ R8, SI
|
||||||
XORQ SI, AX
|
XORQ SI, AX
|
||||||
|
|
||||||
block:
|
block:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x00000000802910e3, R8
|
MOVQ $0x00000000802910e3, R8
|
||||||
IMULQ R8, SI
|
IMULQ R8, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
ROLQ $0x39, BX
|
ROLQ $0x39, BX
|
||||||
XORQ AX, BX
|
XORQ AX, BX
|
||||||
MOVQ 8(CX), SI
|
MOVQ 8(CX), SI
|
||||||
MOVQ $0x00000000819b13af, R8
|
MOVQ $0x00000000819b13af, R8
|
||||||
IMULQ R8, SI
|
IMULQ R8, SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
ROLQ $0x3f, BP
|
ROLQ $0x3f, BP
|
||||||
XORQ DI, BP
|
XORQ DI, BP
|
||||||
MOVQ 16(CX), SI
|
MOVQ 16(CX), SI
|
||||||
MOVQ $0x0000000091cb27e5, R8
|
MOVQ $0x0000000091cb27e5, R8
|
||||||
IMULQ R8, SI
|
IMULQ R8, SI
|
||||||
ADDQ SI, DI
|
ADDQ SI, DI
|
||||||
RORQ $0x2f, DI
|
RORQ $0x2f, DI
|
||||||
ADDQ BX, DI
|
ADDQ BX, DI
|
||||||
MOVQ 24(CX), SI
|
MOVQ 24(CX), SI
|
||||||
MOVQ $0x00000000c1a269c1, R8
|
MOVQ $0x00000000c1a269c1, R8
|
||||||
IMULQ R8, SI
|
IMULQ R8, SI
|
||||||
ADDQ SI, AX
|
ADDQ SI, AX
|
||||||
RORQ $0x0b, AX
|
RORQ $0x0b, AX
|
||||||
SUBQ BP, AX
|
SUBQ BP, AX
|
||||||
ADDQ $0x00000020, CX
|
ADDQ $0x00000020, CX
|
||||||
SUBQ $0x00000020, DX
|
SUBQ $0x00000020, DX
|
||||||
CMPQ DX, $0x00000020
|
CMPQ DX, $0x00000020
|
||||||
JGE block
|
JGE block
|
||||||
MOVQ DX, R8
|
MOVQ DX, R8
|
||||||
MOVQ DX, SI
|
MOVQ DX, SI
|
||||||
SHRQ $0x03, SI
|
SHRQ $0x03, SI
|
||||||
CMPQ SI, $0x00000000
|
CMPQ SI, $0x00000000
|
||||||
JE longCore0
|
JE longCore0
|
||||||
CMPQ SI, $0x00000001
|
CMPQ SI, $0x00000001
|
||||||
JE longCore1
|
JE longCore1
|
||||||
CMPQ SI, $0x00000002
|
CMPQ SI, $0x00000002
|
||||||
JE longCore2
|
JE longCore2
|
||||||
CMPQ SI, $0x00000003
|
CMPQ SI, $0x00000003
|
||||||
JE longCore3
|
JE longCore3
|
||||||
|
|
||||||
longCore3:
|
longCore3:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x00000000802910e3, R9
|
MOVQ $0x00000000802910e3, R9
|
||||||
IMULQ R9, SI
|
IMULQ R9, SI
|
||||||
ADDQ SI, BX
|
ADDQ SI, BX
|
||||||
ROLQ $0x39, BX
|
ROLQ $0x39, BX
|
||||||
XORQ AX, BX
|
XORQ AX, BX
|
||||||
ADDQ $0x00000008, CX
|
ADDQ $0x00000008, CX
|
||||||
SUBQ $0x00000008, DX
|
SUBQ $0x00000008, DX
|
||||||
|
|
||||||
longCore2:
|
longCore2:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x00000000819b13af, R9
|
MOVQ $0x00000000819b13af, R9
|
||||||
IMULQ R9, SI
|
IMULQ R9, SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
ROLQ $0x3f, BP
|
ROLQ $0x3f, BP
|
||||||
XORQ DI, BP
|
XORQ DI, BP
|
||||||
ADDQ $0x00000008, CX
|
ADDQ $0x00000008, CX
|
||||||
SUBQ $0x00000008, DX
|
SUBQ $0x00000008, DX
|
||||||
|
|
||||||
longCore1:
|
longCore1:
|
||||||
MOVQ (CX), SI
|
MOVQ (CX), SI
|
||||||
MOVQ $0x0000000091cb27e5, R9
|
MOVQ $0x0000000091cb27e5, R9
|
||||||
IMULQ R9, SI
|
IMULQ R9, SI
|
||||||
ADDQ SI, DI
|
ADDQ SI, DI
|
||||||
RORQ $0x2f, DI
|
RORQ $0x2f, DI
|
||||||
ADDQ BX, DI
|
ADDQ BX, DI
|
||||||
ADDQ $0x00000008, CX
|
ADDQ $0x00000008, CX
|
||||||
SUBQ $0x00000008, DX
|
SUBQ $0x00000008, DX
|
||||||
|
|
||||||
longCore0:
|
longCore0:
|
||||||
RORQ $0x0b, AX
|
RORQ $0x0b, AX
|
||||||
SUBQ BP, AX
|
SUBQ BP, AX
|
||||||
ADDQ $0x00000001, R8
|
ADDQ $0x00000001, R8
|
||||||
MOVQ $0x9c1b8e1e9628323f, SI
|
MOVQ $0x9c1b8e1e9628323f, SI
|
||||||
IMULQ SI, R8
|
IMULQ SI, R8
|
||||||
XORQ R8, BX
|
XORQ R8, BX
|
||||||
CMPQ DX, $0x00000000
|
CMPQ DX, $0x00000000
|
||||||
JE longTail0
|
JE longTail0
|
||||||
CMPQ DX, $0x00000001
|
CMPQ DX, $0x00000001
|
||||||
JE longTail1
|
JE longTail1
|
||||||
CMPQ DX, $0x00000002
|
CMPQ DX, $0x00000002
|
||||||
JE longTail2
|
JE longTail2
|
||||||
CMPQ DX, $0x00000003
|
CMPQ DX, $0x00000003
|
||||||
JE longTail3
|
JE longTail3
|
||||||
CMPQ DX, $0x00000004
|
CMPQ DX, $0x00000004
|
||||||
JE longTail4
|
JE longTail4
|
||||||
CMPQ DX, $0x00000005
|
CMPQ DX, $0x00000005
|
||||||
JE longTail5
|
JE longTail5
|
||||||
CMPQ DX, $0x00000006
|
CMPQ DX, $0x00000006
|
||||||
JE longTail6
|
JE longTail6
|
||||||
CMPQ DX, $0x00000007
|
CMPQ DX, $0x00000007
|
||||||
JE longTail7
|
JE longTail7
|
||||||
|
|
||||||
longTail7:
|
longTail7:
|
||||||
MOVBQZX 6(CX), SI
|
MOVBQZX 6(CX), SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
|
|
||||||
longTail6:
|
longTail6:
|
||||||
MOVWQZX 4(CX), SI
|
MOVWQZX 4(CX), SI
|
||||||
ADDQ SI, DI
|
ADDQ SI, DI
|
||||||
MOVLQZX (CX), SI
|
MOVLQZX (CX), SI
|
||||||
ADDQ SI, AX
|
ADDQ SI, AX
|
||||||
JMP longAfter
|
JMP longAfter
|
||||||
|
|
||||||
longTail5:
|
longTail5:
|
||||||
MOVBQZX 4(CX), SI
|
MOVBQZX 4(CX), SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
|
|
||||||
longTail4:
|
longTail4:
|
||||||
MOVLQZX (CX), SI
|
MOVLQZX (CX), SI
|
||||||
ADDQ SI, DI
|
ADDQ SI, DI
|
||||||
JMP longAfter
|
JMP longAfter
|
||||||
|
|
||||||
longTail3:
|
longTail3:
|
||||||
MOVBQZX 2(CX), SI
|
MOVBQZX 2(CX), SI
|
||||||
ADDQ SI, AX
|
ADDQ SI, AX
|
||||||
|
|
||||||
longTail2:
|
longTail2:
|
||||||
MOVWQZX (CX), SI
|
MOVWQZX (CX), SI
|
||||||
ADDQ SI, BP
|
ADDQ SI, BP
|
||||||
JMP longAfter
|
JMP longAfter
|
||||||
|
|
||||||
longTail1:
|
longTail1:
|
||||||
MOVBQZX (CX), SI
|
MOVBQZX (CX), SI
|
||||||
ADDQ SI, DI
|
ADDQ SI, DI
|
||||||
|
|
||||||
longTail0:
|
longTail0:
|
||||||
ROLQ $0x20, AX
|
ROLQ $0x20, AX
|
||||||
XORQ $0x000000ff, AX
|
XORQ $0x000000ff, AX
|
||||||
|
|
||||||
longAfter:
|
longAfter:
|
||||||
SUBQ DI, BP
|
SUBQ DI, BP
|
||||||
RORQ $0x13, BX
|
RORQ $0x13, BX
|
||||||
SUBQ BX, BP
|
SUBQ BX, BP
|
||||||
RORQ $0x35, BP
|
RORQ $0x35, BP
|
||||||
XORQ BP, AX
|
XORQ BP, AX
|
||||||
SUBQ AX, BX
|
SUBQ AX, BX
|
||||||
ROLQ $0x2b, AX
|
ROLQ $0x2b, AX
|
||||||
ADDQ AX, BX
|
ADDQ AX, BX
|
||||||
RORQ $0x03, BX
|
RORQ $0x03, BX
|
||||||
SUBQ BX, AX
|
SUBQ BX, AX
|
||||||
RORQ $0x2b, DI
|
RORQ $0x2b, DI
|
||||||
SUBQ AX, DI
|
SUBQ AX, DI
|
||||||
ROLQ $0x37, DI
|
ROLQ $0x37, DI
|
||||||
XORQ BX, DI
|
XORQ BX, DI
|
||||||
SUBQ DI, BP
|
SUBQ DI, BP
|
||||||
RORQ $0x07, AX
|
RORQ $0x07, AX
|
||||||
SUBQ DI, AX
|
SUBQ DI, AX
|
||||||
RORQ $0x1f, DI
|
RORQ $0x1f, DI
|
||||||
ADDQ DI, AX
|
ADDQ DI, AX
|
||||||
SUBQ BP, DI
|
SUBQ BP, DI
|
||||||
RORQ $0x27, AX
|
RORQ $0x27, AX
|
||||||
XORQ AX, DI
|
XORQ AX, DI
|
||||||
RORQ $0x11, AX
|
RORQ $0x11, AX
|
||||||
XORQ DI, AX
|
XORQ DI, AX
|
||||||
ADDQ AX, BP
|
ADDQ AX, BP
|
||||||
RORQ $0x09, BP
|
RORQ $0x09, BP
|
||||||
XORQ BP, DI
|
XORQ BP, DI
|
||||||
ROLQ $0x18, DI
|
ROLQ $0x18, DI
|
||||||
XORQ DI, AX
|
XORQ DI, AX
|
||||||
RORQ $0x3b, AX
|
RORQ $0x3b, AX
|
||||||
RORQ $0x01, BX
|
RORQ $0x01, BX
|
||||||
SUBQ BP, BX
|
SUBQ BP, BX
|
||||||
XORQ BP, BX
|
XORQ BP, BX
|
||||||
XORQ AX, DI
|
XORQ AX, DI
|
||||||
XORQ DI, BX
|
XORQ DI, BX
|
||||||
MOVQ BX, ret+32(FP)
|
MOVQ BX, ret+32(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -4,16 +4,18 @@
|
|||||||
|
|
||||||
// func Sum(xs []uint64) uint64
|
// func Sum(xs []uint64) uint64
|
||||||
TEXT ·Sum(SB), NOSPLIT, $0-32
|
TEXT ·Sum(SB), NOSPLIT, $0-32
|
||||||
MOVQ xs_base(FP), AX
|
MOVQ xs_base(FP), AX
|
||||||
MOVQ xs_len+8(FP), CX
|
MOVQ xs_len+8(FP), CX
|
||||||
XORQ DX, DX
|
XORQ DX, DX
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
CMPQ CX, $0x00
|
CMPQ CX, $0x00
|
||||||
JE done
|
JE done
|
||||||
ADDQ (AX), DX
|
ADDQ (AX), DX
|
||||||
ADDQ $0x08, AX
|
ADDQ $0x08, AX
|
||||||
DECQ CX
|
DECQ CX
|
||||||
JMP loop
|
JMP loop
|
||||||
|
|
||||||
done:
|
done:
|
||||||
MOVQ DX, ret+24(FP)
|
MOVQ DX, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package prnt
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Generator provides convenience methods for code generators. In particular it
|
// Generator provides convenience methods for code generators. In particular it
|
||||||
@@ -15,6 +16,11 @@ type Generator struct {
|
|||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Raw provides direct access to the underlying output stream.
|
||||||
|
func (g *Generator) Raw() io.Writer {
|
||||||
|
return &g.buf
|
||||||
|
}
|
||||||
|
|
||||||
// Printf prints to the internal buffer.
|
// Printf prints to the internal buffer.
|
||||||
func (g *Generator) Printf(format string, args ...interface{}) {
|
func (g *Generator) Printf(format string, args ...interface{}) {
|
||||||
if g.err != nil {
|
if g.err != nil {
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
package printer
|
package printer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"text/tabwriter"
|
||||||
|
|
||||||
"github.com/mmcloughlin/avo/internal/prnt"
|
"github.com/mmcloughlin/avo/internal/prnt"
|
||||||
"github.com/mmcloughlin/avo/ir"
|
"github.com/mmcloughlin/avo/ir"
|
||||||
@@ -81,20 +83,29 @@ func (p *goasm) function(f *ir.Function) {
|
|||||||
}
|
}
|
||||||
p.Printf(", %s\n", textsize(f))
|
p.Printf(", %s\n", textsize(f))
|
||||||
|
|
||||||
|
w := p.tabwriter()
|
||||||
for _, node := range f.Nodes {
|
for _, node := range f.Nodes {
|
||||||
switch n := node.(type) {
|
switch n := node.(type) {
|
||||||
case *ir.Instruction:
|
case *ir.Instruction:
|
||||||
|
leader := []byte{tabwriter.Escape, '\t', tabwriter.Escape}
|
||||||
|
fmt.Fprint(w, string(leader)+n.Opcode)
|
||||||
if len(n.Operands) > 0 {
|
if len(n.Operands) > 0 {
|
||||||
p.Printf("\t%s\t%s\n", n.Opcode, joinOperands(n.Operands))
|
fmt.Fprintf(w, "\t%s", joinOperands(n.Operands))
|
||||||
} else {
|
|
||||||
p.Printf("\t%s\n", n.Opcode)
|
|
||||||
}
|
}
|
||||||
|
fmt.Fprint(w, "\n")
|
||||||
case ir.Label:
|
case ir.Label:
|
||||||
p.Printf("%s:\n", n)
|
w.Flush()
|
||||||
|
w = p.tabwriter()
|
||||||
|
p.Printf("\n%s:\n", n)
|
||||||
default:
|
default:
|
||||||
panic("unexpected node type")
|
panic("unexpected node type")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
w.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *goasm) tabwriter() *tabwriter.Writer {
|
||||||
|
return tabwriter.NewWriter(p.Raw(), 4, 4, 1, ' ', tabwriter.StripEscape)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *goasm) global(g *ir.Global) {
|
func (p *goasm) global(g *ir.Global) {
|
||||||
|
|||||||
@@ -24,10 +24,10 @@ func TestBasic(t *testing.T) {
|
|||||||
"",
|
"",
|
||||||
"// func add(x uint64, y uint64) uint64",
|
"// func add(x uint64, y uint64) uint64",
|
||||||
"TEXT ·add(SB), $0-24",
|
"TEXT ·add(SB), $0-24",
|
||||||
"\tMOVQ\tx(FP), AX",
|
"\tMOVQ x(FP), AX",
|
||||||
"\tMOVQ\ty+8(FP), R9",
|
"\tMOVQ y+8(FP), R9",
|
||||||
"\tADDQ\tAX, R9",
|
"\tADDQ AX, R9",
|
||||||
"\tMOVQ\tR9, ret+16(FP)",
|
"\tMOVQ R9, ret+16(FP)",
|
||||||
"\tRET",
|
"\tRET",
|
||||||
"",
|
"",
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -4,9 +4,9 @@
|
|||||||
|
|
||||||
// func Split(x uint64) (q uint64, l uint32, w uint16, b uint8)
|
// func Split(x uint64) (q uint64, l uint32, w uint16, b uint8)
|
||||||
TEXT ·Split(SB), NOSPLIT, $0-23
|
TEXT ·Split(SB), NOSPLIT, $0-23
|
||||||
MOVQ x(FP), AX
|
MOVQ x(FP), AX
|
||||||
MOVQ AX, q+8(FP)
|
MOVQ AX, q+8(FP)
|
||||||
MOVL AX, l+16(FP)
|
MOVL AX, l+16(FP)
|
||||||
MOVW AX, w+20(FP)
|
MOVW AX, w+20(FP)
|
||||||
MOVB AL, b+22(FP)
|
MOVB AL, b+22(FP)
|
||||||
RET
|
RET
|
||||||
|
|||||||
Reference in New Issue
Block a user