printer: use tabwriter to align instructions (#8)

This commit is contained in:
Michael McLoughlin
2019-01-10 21:21:41 -08:00
parent 0e253b3753
commit f77a2e3b9e
20 changed files with 1876 additions and 1821 deletions

View File

@@ -65,10 +65,10 @@ After running `go generate` the [`add.s`](examples/add/add.s) file will contain
// func Add(x uint64, y uint64) uint64 // func Add(x uint64, y uint64) uint64
TEXT ·Add(SB), NOSPLIT, $0-24 TEXT ·Add(SB), NOSPLIT, $0-24
MOVQ x(FP), AX MOVQ x(FP), AX
MOVQ y+8(FP), CX MOVQ y+8(FP), CX
ADDQ AX, CX ADDQ AX, CX
MOVQ CX, ret+16(FP) MOVQ CX, ret+16(FP)
RET RET
``` ```
@@ -137,18 +137,20 @@ The result from this code generator is:
// func Sum(xs []uint64) uint64 // func Sum(xs []uint64) uint64
TEXT ·Sum(SB), NOSPLIT, $0-32 TEXT ·Sum(SB), NOSPLIT, $0-32
MOVQ xs_base(FP), AX MOVQ xs_base(FP), AX
MOVQ xs_len+8(FP), CX MOVQ xs_len+8(FP), CX
XORQ DX, DX XORQ DX, DX
loop: loop:
CMPQ CX, $0x00 CMPQ CX, $0x00
JE done JE done
ADDQ (AX), DX ADDQ (AX), DX
ADDQ $0x08, AX ADDQ $0x08, AX
DECQ CX DECQ CX
JMP loop JMP loop
done: done:
MOVQ DX, ret+24(FP) MOVQ DX, ret+24(FP)
RET RET
``` ```

View File

@@ -43,9 +43,9 @@ This produces [`add.s`](add.s) as follows:
// func Add(x uint64, y uint64) uint64 // func Add(x uint64, y uint64) uint64
TEXT ·Add(SB), NOSPLIT, $0-24 TEXT ·Add(SB), NOSPLIT, $0-24
MOVQ x(FP), AX MOVQ x(FP), AX
MOVQ y+8(FP), CX MOVQ y+8(FP), CX
ADDQ AX, CX ADDQ AX, CX
MOVQ CX, ret+16(FP) MOVQ CX, ret+16(FP)
RET RET
``` ```

View File

@@ -4,8 +4,8 @@
// func Add(x uint64, y uint64) uint64 // func Add(x uint64, y uint64) uint64
TEXT ·Add(SB), NOSPLIT, $0-24 TEXT ·Add(SB), NOSPLIT, $0-24
MOVQ x(FP), AX MOVQ x(FP), AX
MOVQ y+8(FP), CX MOVQ y+8(FP), CX
ADDQ AX, CX ADDQ AX, CX
MOVQ CX, ret+16(FP) MOVQ CX, ret+16(FP)
RET RET

View File

@@ -20,8 +20,8 @@ This `avo` code will generate the following assembly. Note that parameter refere
```s ```s
// func Second(x int32, y int32) int32 // func Second(x int32, y int32) int32
TEXT ·Second(SB), NOSPLIT, $0-12 TEXT ·Second(SB), NOSPLIT, $0-12
MOVL y+4(FP), AX MOVL y+4(FP), AX
MOVL AX, ret+8(FP) MOVL AX, ret+8(FP)
RET RET
``` ```

View File

@@ -4,108 +4,108 @@
// func Second(x int32, y int32) int32 // func Second(x int32, y int32) int32
TEXT ·Second(SB), NOSPLIT, $0-12 TEXT ·Second(SB), NOSPLIT, $0-12
MOVL y+4(FP), AX MOVL y+4(FP), AX
MOVL AX, ret+8(FP) MOVL AX, ret+8(FP)
RET RET
// func StringLen(s string) int // func StringLen(s string) int
TEXT ·StringLen(SB), NOSPLIT, $0-24 TEXT ·StringLen(SB), NOSPLIT, $0-24
MOVQ s_len+8(FP), AX MOVQ s_len+8(FP), AX
MOVQ AX, ret+16(FP) MOVQ AX, ret+16(FP)
RET RET
// func SliceLen(s []int) int // func SliceLen(s []int) int
TEXT ·SliceLen(SB), NOSPLIT, $0-32 TEXT ·SliceLen(SB), NOSPLIT, $0-32
MOVQ s_len+8(FP), AX MOVQ s_len+8(FP), AX
MOVQ AX, ret+24(FP) MOVQ AX, ret+24(FP)
RET RET
// func SliceCap(s []int) int // func SliceCap(s []int) int
TEXT ·SliceCap(SB), NOSPLIT, $0-32 TEXT ·SliceCap(SB), NOSPLIT, $0-32
MOVQ s_cap+16(FP), AX MOVQ s_cap+16(FP), AX
MOVQ AX, ret+24(FP) MOVQ AX, ret+24(FP)
RET RET
// func ArrayThree(a [7]uint64) uint64 // func ArrayThree(a [7]uint64) uint64
TEXT ·ArrayThree(SB), NOSPLIT, $0-64 TEXT ·ArrayThree(SB), NOSPLIT, $0-64
MOVQ a_3+24(FP), AX MOVQ a_3+24(FP), AX
MOVQ AX, ret+56(FP) MOVQ AX, ret+56(FP)
RET RET
// func FieldByte(s Struct) byte // func FieldByte(s Struct) byte
TEXT ·FieldByte(SB), NOSPLIT, $0-177 TEXT ·FieldByte(SB), NOSPLIT, $0-177
MOVB s_Byte(FP), AL MOVB s_Byte(FP), AL
MOVB AL, ret+176(FP) MOVB AL, ret+176(FP)
RET RET
// func FieldInt8(s Struct) int8 // func FieldInt8(s Struct) int8
TEXT ·FieldInt8(SB), NOSPLIT, $0-177 TEXT ·FieldInt8(SB), NOSPLIT, $0-177
MOVB s_Int8+1(FP), AL MOVB s_Int8+1(FP), AL
MOVB AL, ret+176(FP) MOVB AL, ret+176(FP)
RET RET
// func FieldUint16(s Struct) uint16 // func FieldUint16(s Struct) uint16
TEXT ·FieldUint16(SB), NOSPLIT, $0-178 TEXT ·FieldUint16(SB), NOSPLIT, $0-178
MOVW s_Uint16+2(FP), AX MOVW s_Uint16+2(FP), AX
MOVW AX, ret+176(FP) MOVW AX, ret+176(FP)
RET RET
// func FieldInt32(s Struct) int32 // func FieldInt32(s Struct) int32
TEXT ·FieldInt32(SB), NOSPLIT, $0-180 TEXT ·FieldInt32(SB), NOSPLIT, $0-180
MOVL s_Int32+4(FP), AX MOVL s_Int32+4(FP), AX
MOVL AX, ret+176(FP) MOVL AX, ret+176(FP)
RET RET
// func FieldUint64(s Struct) uint64 // func FieldUint64(s Struct) uint64
TEXT ·FieldUint64(SB), NOSPLIT, $0-184 TEXT ·FieldUint64(SB), NOSPLIT, $0-184
MOVQ s_Uint64+8(FP), AX MOVQ s_Uint64+8(FP), AX
MOVQ AX, ret+176(FP) MOVQ AX, ret+176(FP)
RET RET
// func FieldFloat32(s Struct) float32 // func FieldFloat32(s Struct) float32
TEXT ·FieldFloat32(SB), NOSPLIT, $0-180 TEXT ·FieldFloat32(SB), NOSPLIT, $0-180
MOVSS s_Float32+16(FP), X0 MOVSS s_Float32+16(FP), X0
MOVSS X0, ret+176(FP) MOVSS X0, ret+176(FP)
RET RET
// func FieldFloat64(s Struct) float64 // func FieldFloat64(s Struct) float64
TEXT ·FieldFloat64(SB), NOSPLIT, $0-184 TEXT ·FieldFloat64(SB), NOSPLIT, $0-184
MOVSD s_Float64+24(FP), X0 MOVSD s_Float64+24(FP), X0
MOVSD X0, ret+176(FP) MOVSD X0, ret+176(FP)
RET RET
// func FieldStringLen(s Struct) int // func FieldStringLen(s Struct) int
TEXT ·FieldStringLen(SB), NOSPLIT, $0-184 TEXT ·FieldStringLen(SB), NOSPLIT, $0-184
MOVQ s_String_len+40(FP), AX MOVQ s_String_len+40(FP), AX
MOVQ AX, ret+176(FP) MOVQ AX, ret+176(FP)
RET RET
// func FieldSliceCap(s Struct) int // func FieldSliceCap(s Struct) int
TEXT ·FieldSliceCap(SB), NOSPLIT, $0-184 TEXT ·FieldSliceCap(SB), NOSPLIT, $0-184
MOVQ s_Slice_cap+64(FP), AX MOVQ s_Slice_cap+64(FP), AX
MOVQ AX, ret+176(FP) MOVQ AX, ret+176(FP)
RET RET
// func FieldArrayTwoBTwo(s Struct) byte // func FieldArrayTwoBTwo(s Struct) byte
TEXT ·FieldArrayTwoBTwo(SB), NOSPLIT, $0-177 TEXT ·FieldArrayTwoBTwo(SB), NOSPLIT, $0-177
MOVB s_Array_2_B_2+114(FP), AL MOVB s_Array_2_B_2+114(FP), AL
MOVB AL, ret+176(FP) MOVB AL, ret+176(FP)
RET RET
// func FieldArrayOneC(s Struct) uint16 // func FieldArrayOneC(s Struct) uint16
TEXT ·FieldArrayOneC(SB), NOSPLIT, $0-178 TEXT ·FieldArrayOneC(SB), NOSPLIT, $0-178
MOVW s_Array_1_C+100(FP), AX MOVW s_Array_1_C+100(FP), AX
MOVW AX, ret+176(FP) MOVW AX, ret+176(FP)
RET RET
// func FieldComplex64Imag(s Struct) float32 // func FieldComplex64Imag(s Struct) float32
TEXT ·FieldComplex64Imag(SB), NOSPLIT, $0-180 TEXT ·FieldComplex64Imag(SB), NOSPLIT, $0-180
MOVSS s_Complex64_imag+156(FP), X0 MOVSS s_Complex64_imag+156(FP), X0
MOVSS X0, ret+176(FP) MOVSS X0, ret+176(FP)
RET RET
// func FieldComplex128Real(s Struct) float64 // func FieldComplex128Real(s Struct) float64
TEXT ·FieldComplex128Real(SB), NOSPLIT, $0-184 TEXT ·FieldComplex128Real(SB), NOSPLIT, $0-184
MOVSD s_Complex128_real+160(FP), X0 MOVSD s_Complex128_real+160(FP), X0
MOVSD X0, ret+176(FP) MOVSD X0, ret+176(FP)
RET RET

BIN
examples/backup.tar.gz Normal file

Binary file not shown.

View File

@@ -25,12 +25,12 @@ Generated assembly:
```s ```s
// func Norm(z complex128) float64 // func Norm(z complex128) float64
TEXT ·Norm(SB), NOSPLIT, $0-24 TEXT ·Norm(SB), NOSPLIT, $0-24
MOVSD z_real(FP), X0 MOVSD z_real(FP), X0
MOVSD z_imag+8(FP), X1 MOVSD z_imag+8(FP), X1
MULSD X0, X0 MULSD X0, X0
MULSD X1, X1 MULSD X1, X1
ADDSD X1, X0 ADDSD X1, X0
SQRTSD X0, X2 SQRTSD X0, X2
MOVSD X2, ret+16(FP) MOVSD X2, ret+16(FP)
RET RET
``` ```

View File

@@ -4,23 +4,23 @@
// func Real(z complex128) float64 // func Real(z complex128) float64
TEXT ·Real(SB), NOSPLIT, $0-24 TEXT ·Real(SB), NOSPLIT, $0-24
MOVSD z_real(FP), X0 MOVSD z_real(FP), X0
MOVSD X0, ret+16(FP) MOVSD X0, ret+16(FP)
RET RET
// func Imag(z complex128) float64 // func Imag(z complex128) float64
TEXT ·Imag(SB), NOSPLIT, $0-24 TEXT ·Imag(SB), NOSPLIT, $0-24
MOVSD z_imag+8(FP), X0 MOVSD z_imag+8(FP), X0
MOVSD X0, ret+16(FP) MOVSD X0, ret+16(FP)
RET RET
// func Norm(z complex128) float64 // func Norm(z complex128) float64
TEXT ·Norm(SB), NOSPLIT, $0-24 TEXT ·Norm(SB), NOSPLIT, $0-24
MOVSD z_real(FP), X0 MOVSD z_real(FP), X0
MOVSD z_imag+8(FP), X1 MOVSD z_imag+8(FP), X1
MULSD X0, X0 MULSD X0, X0
MULSD X1, X1 MULSD X1, X1
ADDSD X1, X0 ADDSD X1, X0
SQRTSD X0, X2 SQRTSD X0, X2
MOVSD X2, ret+16(FP) MOVSD X2, ret+16(FP)
RET RET

View File

@@ -14,8 +14,8 @@ GLOBL bytes<>(SB), RODATA|NOPTR, $40
// func DataAt(i int) byte // func DataAt(i int) byte
TEXT ·DataAt(SB), NOSPLIT, $0-9 TEXT ·DataAt(SB), NOSPLIT, $0-9
MOVQ i(FP), AX MOVQ i(FP), AX
LEAQ bytes<>(SB), CX LEAQ bytes<>(SB), CX
MOVB (CX)(AX*1), AL MOVB (CX)(AX*1), AL
MOVB AL, ret+8(FP) MOVB AL, ret+8(FP)
RET RET

View File

@@ -4,55 +4,59 @@
// func Dot(x []float32, y []float32) float32 // func Dot(x []float32, y []float32) float32
TEXT ·Dot(SB), NOSPLIT, $0-52 TEXT ·Dot(SB), NOSPLIT, $0-52
MOVQ x_base(FP), AX MOVQ x_base(FP), AX
MOVQ y_base+24(FP), CX MOVQ y_base+24(FP), CX
MOVQ x_len+8(FP), DX MOVQ x_len+8(FP), DX
VXORPS Y0, Y0, Y0 VXORPS Y0, Y0, Y0
VXORPS Y1, Y1, Y1 VXORPS Y1, Y1, Y1
VXORPS Y2, Y2, Y2 VXORPS Y2, Y2, Y2
VXORPS Y3, Y3, Y3 VXORPS Y3, Y3, Y3
VXORPS Y4, Y4, Y4 VXORPS Y4, Y4, Y4
VXORPS Y5, Y5, Y5 VXORPS Y5, Y5, Y5
blockloop: blockloop:
CMPQ DX, $0x00000030 CMPQ DX, $0x00000030
JL tail JL tail
VMOVUPS (AX), Y6 VMOVUPS (AX), Y6
VMOVUPS 32(AX), Y7 VMOVUPS 32(AX), Y7
VMOVUPS 64(AX), Y8 VMOVUPS 64(AX), Y8
VMOVUPS 96(AX), Y9 VMOVUPS 96(AX), Y9
VMOVUPS 128(AX), Y10 VMOVUPS 128(AX), Y10
VMOVUPS 160(AX), Y11 VMOVUPS 160(AX), Y11
VFMADD231PS (CX), Y6, Y0 VFMADD231PS (CX), Y6, Y0
VFMADD231PS 32(CX), Y7, Y1 VFMADD231PS 32(CX), Y7, Y1
VFMADD231PS 64(CX), Y8, Y2 VFMADD231PS 64(CX), Y8, Y2
VFMADD231PS 96(CX), Y9, Y3 VFMADD231PS 96(CX), Y9, Y3
VFMADD231PS 128(CX), Y10, Y4 VFMADD231PS 128(CX), Y10, Y4
VFMADD231PS 160(CX), Y11, Y5 VFMADD231PS 160(CX), Y11, Y5
ADDQ $0x000000c0, AX ADDQ $0x000000c0, AX
ADDQ $0x000000c0, CX ADDQ $0x000000c0, CX
SUBQ $0x00000030, DX SUBQ $0x00000030, DX
JMP blockloop JMP blockloop
tail: tail:
VXORPS X12, X12, X12 VXORPS X12, X12, X12
tailloop: tailloop:
CMPQ DX, $0x00000000 CMPQ DX, $0x00000000
JE reduce JE reduce
VMOVSS (AX), X6 VMOVSS (AX), X6
VFMADD231SS (CX), X6, X12 VFMADD231SS (CX), X6, X12
ADDQ $0x00000004, AX ADDQ $0x00000004, AX
ADDQ $0x00000004, CX ADDQ $0x00000004, CX
DECQ DX DECQ DX
JMP tailloop JMP tailloop
reduce: reduce:
VADDPS Y0, Y1, Y0 VADDPS Y0, Y1, Y0
VADDPS Y0, Y2, Y0 VADDPS Y0, Y2, Y0
VADDPS Y0, Y3, Y0 VADDPS Y0, Y3, Y0
VADDPS Y0, Y4, Y0 VADDPS Y0, Y4, Y0
VADDPS Y0, Y5, Y0 VADDPS Y0, Y5, Y0
VEXTRACTF128 $0x01, Y0, X1 VEXTRACTF128 $0x01, Y0, X1
VADDPS X0, X1, X0 VADDPS X0, X1, X0
VADDPS X0, X12, X0 VADDPS X0, X12, X0
VHADDPS X0, X0, X0 VHADDPS X0, X0, X0
VHADDPS X0, X0, X0 VHADDPS X0, X0, X0
MOVSS X0, ret+48(FP) MOVSS X0, ret+48(FP)
RET RET

View File

@@ -4,19 +4,21 @@
// func Hash64(data []byte) uint64 // func Hash64(data []byte) uint64
TEXT ·Hash64(SB), NOSPLIT, $0-32 TEXT ·Hash64(SB), NOSPLIT, $0-32
MOVQ data_base(FP), CX MOVQ data_base(FP), CX
MOVQ data_len+8(FP), BX MOVQ data_len+8(FP), BX
MOVQ $0xcbf29ce484222325, AX MOVQ $0xcbf29ce484222325, AX
MOVQ $0x00000100000001b3, BP MOVQ $0x00000100000001b3, BP
loop: loop:
CMPQ BX, $0x00 CMPQ BX, $0x00
JE done JE done
MOVBQZX (CX), DX MOVBQZX (CX), DX
XORQ DX, AX XORQ DX, AX
MULQ BP MULQ BP
INCQ CX INCQ CX
DECQ BX DECQ BX
JMP loop JMP loop
done: done:
MOVQ AX, ret+24(FP) MOVQ AX, ret+24(FP)
RET RET

View File

@@ -4,21 +4,21 @@
// func EncodeInt(lat float64, lng float64) uint64 // func EncodeInt(lat float64, lng float64) uint64
TEXT ·EncodeInt(SB), NOSPLIT, $0-24 TEXT ·EncodeInt(SB), NOSPLIT, $0-24
MOVSD lat(FP), X0 MOVSD lat(FP), X0
MOVSD lng+8(FP), X1 MOVSD lng+8(FP), X1
MULSD reciprocal180<>(SB), X0 MULSD reciprocal180<>(SB), X0
ADDSD onepointfive<>(SB), X0 ADDSD onepointfive<>(SB), X0
MULSD reciprocal360<>(SB), X1 MULSD reciprocal360<>(SB), X1
ADDSD onepointfive<>(SB), X1 ADDSD onepointfive<>(SB), X1
MOVQ X0, CX MOVQ X0, CX
SHRQ $0x14, CX SHRQ $0x14, CX
MOVQ X1, AX MOVQ X1, AX
SHRQ $0x14, AX SHRQ $0x14, AX
PDEPQ mask<>(SB), CX, CX PDEPQ mask<>(SB), CX, CX
PDEPQ mask<>(SB), AX, AX PDEPQ mask<>(SB), AX, AX
SHLQ $0x01, AX SHLQ $0x01, AX
XORQ AX, CX XORQ AX, CX
MOVQ CX, ret+16(FP) MOVQ CX, ret+16(FP)
RET RET
DATA reciprocal180<>(SB)/8, $(0.005555555555555556) DATA reciprocal180<>(SB)/8, $(0.005555555555555556)

View File

@@ -4,43 +4,43 @@
// func Interval(start uint64, size uint64) (uint64, uint64) // func Interval(start uint64, size uint64) (uint64, uint64)
TEXT ·Interval(SB), NOSPLIT, $0-32 TEXT ·Interval(SB), NOSPLIT, $0-32
MOVQ start(FP), AX MOVQ start(FP), AX
MOVQ size+8(FP), CX MOVQ size+8(FP), CX
ADDQ AX, CX ADDQ AX, CX
MOVQ AX, ret+16(FP) MOVQ AX, ret+16(FP)
MOVQ CX, ret1+24(FP) MOVQ CX, ret1+24(FP)
RET RET
// func Butterfly(x0 float64, x1 float64) (y0 float64, y1 float64) // func Butterfly(x0 float64, x1 float64) (y0 float64, y1 float64)
TEXT ·Butterfly(SB), NOSPLIT, $0-32 TEXT ·Butterfly(SB), NOSPLIT, $0-32
MOVSD x0(FP), X0 MOVSD x0(FP), X0
MOVSD x1+8(FP), X1 MOVSD x1+8(FP), X1
MOVSD X0, X2 MOVSD X0, X2
ADDSD X1, X2 ADDSD X1, X2
MOVSD X0, X3 MOVSD X0, X3
SUBSD X1, X3 SUBSD X1, X3
MOVSD X2, y0+16(FP) MOVSD X2, y0+16(FP)
MOVSD X3, y1+24(FP) MOVSD X3, y1+24(FP)
RET RET
// func Septuple(byte) [7]byte // func Septuple(byte) [7]byte
TEXT ·Septuple(SB), NOSPLIT, $0-15 TEXT ·Septuple(SB), NOSPLIT, $0-15
MOVB arg(FP), AL MOVB arg(FP), AL
MOVB AL, ret_0+8(FP) MOVB AL, ret_0+8(FP)
MOVB AL, ret_1+9(FP) MOVB AL, ret_1+9(FP)
MOVB AL, ret_2+10(FP) MOVB AL, ret_2+10(FP)
MOVB AL, ret_3+11(FP) MOVB AL, ret_3+11(FP)
MOVB AL, ret_4+12(FP) MOVB AL, ret_4+12(FP)
MOVB AL, ret_5+13(FP) MOVB AL, ret_5+13(FP)
MOVB AL, ret_6+14(FP) MOVB AL, ret_6+14(FP)
RET RET
// func CriticalLine(t float64) complex128 // func CriticalLine(t float64) complex128
TEXT ·CriticalLine(SB), NOSPLIT, $0-24 TEXT ·CriticalLine(SB), NOSPLIT, $0-24
MOVSD t(FP), X0 MOVSD t(FP), X0
MOVSD half<>(SB), X1 MOVSD half<>(SB), X1
MOVSD X1, ret_real+8(FP) MOVSD X1, ret_real+8(FP)
MOVSD X0, ret_imag+16(FP) MOVSD X0, ret_imag+16(FP)
RET RET
DATA half<>(SB)/8, $(0.5) DATA half<>(SB)/8, $(0.5)
@@ -48,12 +48,12 @@ GLOBL half<>(SB), RODATA|NOPTR, $8
// func NewStruct(w uint16, p [2]float64, q uint64) Struct // func NewStruct(w uint16, p [2]float64, q uint64) Struct
TEXT ·NewStruct(SB), NOSPLIT, $0-64 TEXT ·NewStruct(SB), NOSPLIT, $0-64
MOVW w(FP), AX MOVW w(FP), AX
MOVSD p_0+8(FP), X0 MOVSD p_0+8(FP), X0
MOVSD p_1+16(FP), X1 MOVSD p_1+16(FP), X1
MOVQ q+24(FP), CX MOVQ q+24(FP), CX
MOVW AX, ret_Word+32(FP) MOVW AX, ret_Word+32(FP)
MOVSD X0, ret_Point_0+40(FP) MOVSD X0, ret_Point_0+40(FP)
MOVSD X1, ret_Point_1+48(FP) MOVSD X1, ret_Point_1+48(FP)
MOVQ CX, ret_Quad+56(FP) MOVQ CX, ret_Quad+56(FP)
RET RET

File diff suppressed because it is too large Load Diff

View File

@@ -4,307 +4,335 @@
// func Hash(state *State, key []byte) uint64 // func Hash(state *State, key []byte) uint64
TEXT ·Hash(SB), NOSPLIT, $0-40 TEXT ·Hash(SB), NOSPLIT, $0-40
MOVQ state(FP), AX MOVQ state(FP), AX
MOVQ key_base+8(FP), CX MOVQ key_base+8(FP), CX
MOVQ key_len+16(FP), DX MOVQ key_len+16(FP), DX
MOVQ (AX), BX MOVQ (AX), BX
MOVQ 8(AX), BP MOVQ 8(AX), BP
MOVQ DX, SI MOVQ DX, SI
ADDQ $0x00000001, SI ADDQ $0x00000001, SI
MOVQ $0xb89b0f8e1655514f, DI MOVQ $0xb89b0f8e1655514f, DI
IMULQ DI, SI IMULQ DI, SI
XORQ SI, BX XORQ SI, BX
MOVQ DX, SI MOVQ DX, SI
ADDQ $0x00000002, SI ADDQ $0x00000002, SI
MOVQ $0x8c6f736011bd5127, DI MOVQ $0x8c6f736011bd5127, DI
IMULQ DI, SI IMULQ DI, SI
XORQ SI, BP XORQ SI, BP
CMPQ DX, $0x00000020 CMPQ DX, $0x00000020
JGE coreLong JGE coreLong
MOVQ DX, SI MOVQ DX, SI
SHRQ $0x03, SI SHRQ $0x03, SI
CMPQ SI, $0x00000000 CMPQ SI, $0x00000000
JE shortCore0 JE shortCore0
CMPQ SI, $0x00000001 CMPQ SI, $0x00000001
JE shortCore1 JE shortCore1
CMPQ SI, $0x00000002 CMPQ SI, $0x00000002
JE shortCore2 JE shortCore2
CMPQ SI, $0x00000003 CMPQ SI, $0x00000003
JE shortCore3 JE shortCore3
shortCore3: shortCore3:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x9c1b8e1e9628323f, DI MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, SI IMULQ DI, SI
ADDQ SI, BX ADDQ SI, BX
RORQ $0x11, BX RORQ $0x11, BX
XORQ BP, BX XORQ BP, BX
RORQ $0x35, BP RORQ $0x35, BP
ADDQ BX, BP ADDQ BX, BP
ADDQ $0x00000008, CX ADDQ $0x00000008, CX
SUBQ $0x00000008, DX SUBQ $0x00000008, DX
shortCore2: shortCore2:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x9c1b8e1e9628323f, DI MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, SI IMULQ DI, SI
ADDQ SI, BX ADDQ SI, BX
RORQ $0x11, BX RORQ $0x11, BX
XORQ BP, BX XORQ BP, BX
RORQ $0x35, BP RORQ $0x35, BP
ADDQ BX, BP ADDQ BX, BP
ADDQ $0x00000008, CX ADDQ $0x00000008, CX
SUBQ $0x00000008, DX SUBQ $0x00000008, DX
shortCore1: shortCore1:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x9c1b8e1e9628323f, DI MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, SI IMULQ DI, SI
ADDQ SI, BX ADDQ SI, BX
RORQ $0x11, BX RORQ $0x11, BX
XORQ BP, BX XORQ BP, BX
RORQ $0x35, BP RORQ $0x35, BP
ADDQ BX, BP ADDQ BX, BP
ADDQ $0x00000008, CX ADDQ $0x00000008, CX
SUBQ $0x00000008, DX SUBQ $0x00000008, DX
shortCore0: shortCore0:
CMPQ DX, $0x00000000 CMPQ DX, $0x00000000
JE shortTail0 JE shortTail0
CMPQ DX, $0x00000001 CMPQ DX, $0x00000001
JE shortTail1 JE shortTail1
CMPQ DX, $0x00000002 CMPQ DX, $0x00000002
JE shortTail2 JE shortTail2
CMPQ DX, $0x00000003 CMPQ DX, $0x00000003
JE shortTail3 JE shortTail3
CMPQ DX, $0x00000004 CMPQ DX, $0x00000004
JE shortTail4 JE shortTail4
CMPQ DX, $0x00000005 CMPQ DX, $0x00000005
JE shortTail5 JE shortTail5
CMPQ DX, $0x00000006 CMPQ DX, $0x00000006
JE shortTail6 JE shortTail6
CMPQ DX, $0x00000007 CMPQ DX, $0x00000007
JE shortTail7 JE shortTail7
shortTail7: shortTail7:
MOVBQZX 6(CX), SI MOVBQZX 6(CX), SI
SHLQ $0x20, SI SHLQ $0x20, SI
ADDQ SI, BX ADDQ SI, BX
shortTail6: shortTail6:
MOVBQZX 5(CX), SI MOVBQZX 5(CX), SI
SHLQ $0x30, SI SHLQ $0x30, SI
ADDQ SI, BP ADDQ SI, BP
shortTail5: shortTail5:
MOVBQZX 4(CX), SI MOVBQZX 4(CX), SI
SHLQ $0x10, SI SHLQ $0x10, SI
ADDQ SI, BX ADDQ SI, BX
shortTail4: shortTail4:
MOVLQZX (CX), SI MOVLQZX (CX), SI
ADDQ SI, BP ADDQ SI, BP
JMP shortAfter JMP shortAfter
shortTail3: shortTail3:
MOVBQZX 2(CX), SI MOVBQZX 2(CX), SI
SHLQ $0x30, SI SHLQ $0x30, SI
ADDQ SI, BX ADDQ SI, BX
shortTail2: shortTail2:
MOVWQZX (CX), SI MOVWQZX (CX), SI
ADDQ SI, BP ADDQ SI, BP
JMP shortAfter JMP shortAfter
shortTail1: shortTail1:
MOVBQZX (CX), SI MOVBQZX (CX), SI
ADDQ SI, BX ADDQ SI, BX
shortTail0: shortTail0:
RORQ $0x20, BP RORQ $0x20, BP
XORQ $0x000000ff, BP XORQ $0x000000ff, BP
shortAfter: shortAfter:
XORQ BX, BP XORQ BX, BP
RORQ $0x21, BX RORQ $0x21, BX
ADDQ BP, BX ADDQ BP, BX
ROLQ $0x11, BP ROLQ $0x11, BP
XORQ BX, BP XORQ BX, BP
ROLQ $0x2b, BX ROLQ $0x2b, BX
ADDQ BP, BX ADDQ BP, BX
ROLQ $0x1f, BP ROLQ $0x1f, BP
SUBQ BX, BP SUBQ BX, BP
ROLQ $0x0d, BX ROLQ $0x0d, BX
XORQ BP, BX XORQ BP, BX
SUBQ BX, BP SUBQ BX, BP
ROLQ $0x29, BX ROLQ $0x29, BX
ADDQ BP, BX ADDQ BP, BX
ROLQ $0x25, BP ROLQ $0x25, BP
XORQ BX, BP XORQ BX, BP
RORQ $0x27, BX RORQ $0x27, BX
ADDQ BP, BX ADDQ BP, BX
RORQ $0x0f, BP RORQ $0x0f, BP
ADDQ BX, BP ADDQ BX, BP
ROLQ $0x0f, BX ROLQ $0x0f, BX
XORQ BP, BX XORQ BP, BX
RORQ $0x05, BP RORQ $0x05, BP
XORQ BP, BX XORQ BP, BX
MOVQ BX, ret+32(FP) MOVQ BX, ret+32(FP)
RET RET
coreLong: coreLong:
MOVQ 16(AX), DI MOVQ 16(AX), DI
MOVQ 24(AX), AX MOVQ 24(AX), AX
MOVQ DX, SI MOVQ DX, SI
ADDQ $0x00000003, SI ADDQ $0x00000003, SI
MOVQ $0x8f29bd94edce7b39, R8 MOVQ $0x8f29bd94edce7b39, R8
IMULQ R8, SI IMULQ R8, SI
XORQ SI, DI XORQ SI, DI
MOVQ DX, SI MOVQ DX, SI
ADDQ $0x00000004, SI ADDQ $0x00000004, SI
MOVQ $0x9c1b8e1e9628323f, R8 MOVQ $0x9c1b8e1e9628323f, R8
IMULQ R8, SI IMULQ R8, SI
XORQ SI, AX XORQ SI, AX
block: block:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x00000000802910e3, R8 MOVQ $0x00000000802910e3, R8
IMULQ R8, SI IMULQ R8, SI
ADDQ SI, BX ADDQ SI, BX
ROLQ $0x39, BX ROLQ $0x39, BX
XORQ AX, BX XORQ AX, BX
MOVQ 8(CX), SI MOVQ 8(CX), SI
MOVQ $0x00000000819b13af, R8 MOVQ $0x00000000819b13af, R8
IMULQ R8, SI IMULQ R8, SI
ADDQ SI, BP ADDQ SI, BP
ROLQ $0x3f, BP ROLQ $0x3f, BP
XORQ DI, BP XORQ DI, BP
MOVQ 16(CX), SI MOVQ 16(CX), SI
MOVQ $0x0000000091cb27e5, R8 MOVQ $0x0000000091cb27e5, R8
IMULQ R8, SI IMULQ R8, SI
ADDQ SI, DI ADDQ SI, DI
RORQ $0x2f, DI RORQ $0x2f, DI
ADDQ BX, DI ADDQ BX, DI
MOVQ 24(CX), SI MOVQ 24(CX), SI
MOVQ $0x00000000c1a269c1, R8 MOVQ $0x00000000c1a269c1, R8
IMULQ R8, SI IMULQ R8, SI
ADDQ SI, AX ADDQ SI, AX
RORQ $0x0b, AX RORQ $0x0b, AX
SUBQ BP, AX SUBQ BP, AX
ADDQ $0x00000020, CX ADDQ $0x00000020, CX
SUBQ $0x00000020, DX SUBQ $0x00000020, DX
CMPQ DX, $0x00000020 CMPQ DX, $0x00000020
JGE block JGE block
MOVQ DX, R8 MOVQ DX, R8
MOVQ DX, SI MOVQ DX, SI
SHRQ $0x03, SI SHRQ $0x03, SI
CMPQ SI, $0x00000000 CMPQ SI, $0x00000000
JE longCore0 JE longCore0
CMPQ SI, $0x00000001 CMPQ SI, $0x00000001
JE longCore1 JE longCore1
CMPQ SI, $0x00000002 CMPQ SI, $0x00000002
JE longCore2 JE longCore2
CMPQ SI, $0x00000003 CMPQ SI, $0x00000003
JE longCore3 JE longCore3
longCore3: longCore3:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x00000000802910e3, R9 MOVQ $0x00000000802910e3, R9
IMULQ R9, SI IMULQ R9, SI
ADDQ SI, BX ADDQ SI, BX
ROLQ $0x39, BX ROLQ $0x39, BX
XORQ AX, BX XORQ AX, BX
ADDQ $0x00000008, CX ADDQ $0x00000008, CX
SUBQ $0x00000008, DX SUBQ $0x00000008, DX
longCore2: longCore2:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x00000000819b13af, R9 MOVQ $0x00000000819b13af, R9
IMULQ R9, SI IMULQ R9, SI
ADDQ SI, BP ADDQ SI, BP
ROLQ $0x3f, BP ROLQ $0x3f, BP
XORQ DI, BP XORQ DI, BP
ADDQ $0x00000008, CX ADDQ $0x00000008, CX
SUBQ $0x00000008, DX SUBQ $0x00000008, DX
longCore1: longCore1:
MOVQ (CX), SI MOVQ (CX), SI
MOVQ $0x0000000091cb27e5, R9 MOVQ $0x0000000091cb27e5, R9
IMULQ R9, SI IMULQ R9, SI
ADDQ SI, DI ADDQ SI, DI
RORQ $0x2f, DI RORQ $0x2f, DI
ADDQ BX, DI ADDQ BX, DI
ADDQ $0x00000008, CX ADDQ $0x00000008, CX
SUBQ $0x00000008, DX SUBQ $0x00000008, DX
longCore0: longCore0:
RORQ $0x0b, AX RORQ $0x0b, AX
SUBQ BP, AX SUBQ BP, AX
ADDQ $0x00000001, R8 ADDQ $0x00000001, R8
MOVQ $0x9c1b8e1e9628323f, SI MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, R8 IMULQ SI, R8
XORQ R8, BX XORQ R8, BX
CMPQ DX, $0x00000000 CMPQ DX, $0x00000000
JE longTail0 JE longTail0
CMPQ DX, $0x00000001 CMPQ DX, $0x00000001
JE longTail1 JE longTail1
CMPQ DX, $0x00000002 CMPQ DX, $0x00000002
JE longTail2 JE longTail2
CMPQ DX, $0x00000003 CMPQ DX, $0x00000003
JE longTail3 JE longTail3
CMPQ DX, $0x00000004 CMPQ DX, $0x00000004
JE longTail4 JE longTail4
CMPQ DX, $0x00000005 CMPQ DX, $0x00000005
JE longTail5 JE longTail5
CMPQ DX, $0x00000006 CMPQ DX, $0x00000006
JE longTail6 JE longTail6
CMPQ DX, $0x00000007 CMPQ DX, $0x00000007
JE longTail7 JE longTail7
longTail7: longTail7:
MOVBQZX 6(CX), SI MOVBQZX 6(CX), SI
ADDQ SI, BP ADDQ SI, BP
longTail6: longTail6:
MOVWQZX 4(CX), SI MOVWQZX 4(CX), SI
ADDQ SI, DI ADDQ SI, DI
MOVLQZX (CX), SI MOVLQZX (CX), SI
ADDQ SI, AX ADDQ SI, AX
JMP longAfter JMP longAfter
longTail5: longTail5:
MOVBQZX 4(CX), SI MOVBQZX 4(CX), SI
ADDQ SI, BP ADDQ SI, BP
longTail4: longTail4:
MOVLQZX (CX), SI MOVLQZX (CX), SI
ADDQ SI, DI ADDQ SI, DI
JMP longAfter JMP longAfter
longTail3: longTail3:
MOVBQZX 2(CX), SI MOVBQZX 2(CX), SI
ADDQ SI, AX ADDQ SI, AX
longTail2: longTail2:
MOVWQZX (CX), SI MOVWQZX (CX), SI
ADDQ SI, BP ADDQ SI, BP
JMP longAfter JMP longAfter
longTail1: longTail1:
MOVBQZX (CX), SI MOVBQZX (CX), SI
ADDQ SI, DI ADDQ SI, DI
longTail0: longTail0:
ROLQ $0x20, AX ROLQ $0x20, AX
XORQ $0x000000ff, AX XORQ $0x000000ff, AX
longAfter: longAfter:
SUBQ DI, BP SUBQ DI, BP
RORQ $0x13, BX RORQ $0x13, BX
SUBQ BX, BP SUBQ BX, BP
RORQ $0x35, BP RORQ $0x35, BP
XORQ BP, AX XORQ BP, AX
SUBQ AX, BX SUBQ AX, BX
ROLQ $0x2b, AX ROLQ $0x2b, AX
ADDQ AX, BX ADDQ AX, BX
RORQ $0x03, BX RORQ $0x03, BX
SUBQ BX, AX SUBQ BX, AX
RORQ $0x2b, DI RORQ $0x2b, DI
SUBQ AX, DI SUBQ AX, DI
ROLQ $0x37, DI ROLQ $0x37, DI
XORQ BX, DI XORQ BX, DI
SUBQ DI, BP SUBQ DI, BP
RORQ $0x07, AX RORQ $0x07, AX
SUBQ DI, AX SUBQ DI, AX
RORQ $0x1f, DI RORQ $0x1f, DI
ADDQ DI, AX ADDQ DI, AX
SUBQ BP, DI SUBQ BP, DI
RORQ $0x27, AX RORQ $0x27, AX
XORQ AX, DI XORQ AX, DI
RORQ $0x11, AX RORQ $0x11, AX
XORQ DI, AX XORQ DI, AX
ADDQ AX, BP ADDQ AX, BP
RORQ $0x09, BP RORQ $0x09, BP
XORQ BP, DI XORQ BP, DI
ROLQ $0x18, DI ROLQ $0x18, DI
XORQ DI, AX XORQ DI, AX
RORQ $0x3b, AX RORQ $0x3b, AX
RORQ $0x01, BX RORQ $0x01, BX
SUBQ BP, BX SUBQ BP, BX
XORQ BP, BX XORQ BP, BX
XORQ AX, DI XORQ AX, DI
XORQ DI, BX XORQ DI, BX
MOVQ BX, ret+32(FP) MOVQ BX, ret+32(FP)
RET RET

View File

@@ -4,16 +4,18 @@
// func Sum(xs []uint64) uint64 // func Sum(xs []uint64) uint64
TEXT ·Sum(SB), NOSPLIT, $0-32 TEXT ·Sum(SB), NOSPLIT, $0-32
MOVQ xs_base(FP), AX MOVQ xs_base(FP), AX
MOVQ xs_len+8(FP), CX MOVQ xs_len+8(FP), CX
XORQ DX, DX XORQ DX, DX
loop: loop:
CMPQ CX, $0x00 CMPQ CX, $0x00
JE done JE done
ADDQ (AX), DX ADDQ (AX), DX
ADDQ $0x08, AX ADDQ $0x08, AX
DECQ CX DECQ CX
JMP loop JMP loop
done: done:
MOVQ DX, ret+24(FP) MOVQ DX, ret+24(FP)
RET RET

View File

@@ -4,6 +4,7 @@ package prnt
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"io"
) )
// Generator provides convenience methods for code generators. In particular it // Generator provides convenience methods for code generators. In particular it
@@ -15,6 +16,11 @@ type Generator struct {
err error err error
} }
// Raw provides direct access to the underlying output stream.
func (g *Generator) Raw() io.Writer {
return &g.buf
}
// Printf prints to the internal buffer. // Printf prints to the internal buffer.
func (g *Generator) Printf(format string, args ...interface{}) { func (g *Generator) Printf(format string, args ...interface{}) {
if g.err != nil { if g.err != nil {

View File

@@ -1,8 +1,10 @@
package printer package printer
import ( import (
"fmt"
"strconv" "strconv"
"strings" "strings"
"text/tabwriter"
"github.com/mmcloughlin/avo/internal/prnt" "github.com/mmcloughlin/avo/internal/prnt"
"github.com/mmcloughlin/avo/ir" "github.com/mmcloughlin/avo/ir"
@@ -81,20 +83,29 @@ func (p *goasm) function(f *ir.Function) {
} }
p.Printf(", %s\n", textsize(f)) p.Printf(", %s\n", textsize(f))
w := p.tabwriter()
for _, node := range f.Nodes { for _, node := range f.Nodes {
switch n := node.(type) { switch n := node.(type) {
case *ir.Instruction: case *ir.Instruction:
leader := []byte{tabwriter.Escape, '\t', tabwriter.Escape}
fmt.Fprint(w, string(leader)+n.Opcode)
if len(n.Operands) > 0 { if len(n.Operands) > 0 {
p.Printf("\t%s\t%s\n", n.Opcode, joinOperands(n.Operands)) fmt.Fprintf(w, "\t%s", joinOperands(n.Operands))
} else {
p.Printf("\t%s\n", n.Opcode)
} }
fmt.Fprint(w, "\n")
case ir.Label: case ir.Label:
p.Printf("%s:\n", n) w.Flush()
w = p.tabwriter()
p.Printf("\n%s:\n", n)
default: default:
panic("unexpected node type") panic("unexpected node type")
} }
} }
w.Flush()
}
func (p *goasm) tabwriter() *tabwriter.Writer {
return tabwriter.NewWriter(p.Raw(), 4, 4, 1, ' ', tabwriter.StripEscape)
} }
func (p *goasm) global(g *ir.Global) { func (p *goasm) global(g *ir.Global) {

View File

@@ -24,10 +24,10 @@ func TestBasic(t *testing.T) {
"", "",
"// func add(x uint64, y uint64) uint64", "// func add(x uint64, y uint64) uint64",
"TEXT ·add(SB), $0-24", "TEXT ·add(SB), $0-24",
"\tMOVQ\tx(FP), AX", "\tMOVQ x(FP), AX",
"\tMOVQ\ty+8(FP), R9", "\tMOVQ y+8(FP), R9",
"\tADDQ\tAX, R9", "\tADDQ AX, R9",
"\tMOVQ\tR9, ret+16(FP)", "\tMOVQ R9, ret+16(FP)",
"\tRET", "\tRET",
"", "",
}) })

View File

@@ -4,9 +4,9 @@
// func Split(x uint64) (q uint64, l uint32, w uint16, b uint8) // func Split(x uint64) (q uint64, l uint32, w uint16, b uint8)
TEXT ·Split(SB), NOSPLIT, $0-23 TEXT ·Split(SB), NOSPLIT, $0-23
MOVQ x(FP), AX MOVQ x(FP), AX
MOVQ AX, q+8(FP) MOVQ AX, q+8(FP)
MOVL AX, l+16(FP) MOVL AX, l+16(FP)
MOVW AX, w+20(FP) MOVW AX, w+20(FP)
MOVB AL, b+22(FP) MOVB AL, b+22(FP)
RET RET