examples/dot: initial version

Implements dot product in avo.

Closes #4
This commit is contained in:
Michael McLoughlin
2018-12-30 18:46:36 -08:00
parent 18cdf50d7c
commit edc4d3f00c
5 changed files with 209 additions and 0 deletions

3
examples/dot/README.md Normal file
View File

@@ -0,0 +1,3 @@
# dot
[Dot product](https://en.wikipedia.org/wiki/Dot_product) in `avo`. Ported from the [`dot_product.py` PeachPy example](https://github.com/Maratyszcza/PeachPy/blob/01d15157a973a4ae16b8046313ddab371ea582db/examples/go-generate/dot_product.py).

93
examples/dot/asm.go Normal file
View File

@@ -0,0 +1,93 @@
// +build ignore
package main
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
var unroll = 6
func main() {
TEXT("Dot", "func(x, y []float32) float32")
x := Mem{Base: Load(Param("x").Base(), GP64v())}
y := Mem{Base: Load(Param("y").Base(), GP64v())}
n := Load(Param("x").Len(), GP64v())
// Allocate accumulation registers.
acc := make([]VecVirtual, unroll)
for i := 0; i < unroll; i++ {
acc[i] = Yv()
}
// Zero initialization.
for i := 0; i < unroll; i++ {
VXORPS(acc[i], acc[i], acc[i])
}
// Loop over blocks and process them with vector instructions.
blockitems := 8 * unroll
blocksize := 4 * blockitems
LABEL("blockloop")
CMPQ(n, U32(blockitems))
JL(LabelRef("tail"))
// Load x.
xs := make([]VecVirtual, unroll)
for i := 0; i < unroll; i++ {
xs[i] = Yv()
}
for i := 0; i < unroll; i++ {
VMOVUPS(x.Offset(32*i), xs[i])
}
// The actual FMA.
for i := 0; i < unroll; i++ {
VFMADD231PS(y.Offset(32*i), xs[i], acc[i])
}
ADDQ(U32(blocksize), x.Base)
ADDQ(U32(blocksize), y.Base)
SUBQ(U32(blockitems), n)
JMP(LabelRef("blockloop"))
// Process any trailing entries.
LABEL("tail")
tail := Xv()
VXORPS(tail, tail, tail)
LABEL("tailloop")
CMPQ(n, U32(0))
JE(LabelRef("reduce"))
xt := Xv()
VMOVSS(x, xt)
VFMADD231SS(y, xt, tail)
ADDQ(U32(4), x.Base)
ADDQ(U32(4), y.Base)
DECQ(n)
JMP(LabelRef("tailloop"))
// Reduce the lanes to one.
LABEL("reduce")
for i := 1; i < unroll; i++ {
VADDPS(acc[0], acc[i], acc[0])
}
result := acc[0].AsX()
top := Xv()
VEXTRACTF128(U8(1), acc[0], top)
VADDPS(result, top, result)
VADDPS(result, tail, result)
VHADDPS(result, result, result)
VHADDPS(result, result, result)
Store(result, ReturnIndex(0))
RET()
Generate()
}

58
examples/dot/dot.s Normal file
View File

@@ -0,0 +1,58 @@
// Code generated by command: go run asm.go -out dot.s -stubs stub.go. DO NOT EDIT.
#include "textflag.h"
// func Dot(x []float32, y []float32) float32
TEXT ·Dot(SB), 0, $0-52
MOVQ x_base(FP), AX
MOVQ y_base+24(FP), CX
MOVQ x_len+8(FP), DX
VXORPS Y0, Y0, Y0
VXORPS Y1, Y1, Y1
VXORPS Y2, Y2, Y2
VXORPS Y3, Y3, Y3
VXORPS Y4, Y4, Y4
VXORPS Y5, Y5, Y5
blockloop:
CMPQ DX, $0x00000030
JL tail
VMOVUPS (AX), Y6
VMOVUPS 32(AX), Y7
VMOVUPS 64(AX), Y8
VMOVUPS 96(AX), Y9
VMOVUPS 128(AX), Y10
VMOVUPS 160(AX), Y11
VFMADD231PS (CX), Y6, Y0
VFMADD231PS 32(CX), Y7, Y1
VFMADD231PS 64(CX), Y8, Y2
VFMADD231PS 96(CX), Y9, Y3
VFMADD231PS 128(CX), Y10, Y4
VFMADD231PS 160(CX), Y11, Y5
ADDQ $0x000000c0, AX
ADDQ $0x000000c0, CX
SUBQ $0x00000030, DX
JMP blockloop
tail:
VXORPS X12, X12, X12
tailloop:
CMPQ DX, $0x00000000
JE reduce
VMOVSS (AX), X6
VFMADD231SS (CX), X6, X12
ADDQ $0x00000004, AX
ADDQ $0x00000004, CX
DECQ DX
JMP tailloop
reduce:
VADDPS Y0, Y1, Y0
VADDPS Y0, Y2, Y0
VADDPS Y0, Y3, Y0
VADDPS Y0, Y4, Y0
VADDPS Y0, Y5, Y0
VEXTRACTF128 $0x01, Y0, X1
VADDPS X0, X1, X0
VADDPS X0, X12, X0
VHADDPS X0, X0, X0
VHADDPS X0, X0, X0
MOVSS X0, ret+48(FP)
RET

50
examples/dot/dot_test.go Normal file
View File

@@ -0,0 +1,50 @@
package dot
import (
"math/rand"
"testing"
)
//go:generate go run asm.go -out dot.s -stubs stub.go
func TestEmpty(t *testing.T) {
if Dot(nil, nil) != 0.0 {
t.Fatal("expect dot product of empty vectors to be zero")
}
}
func TestLengths(t *testing.T) {
const epsilon = 0.00001
for n := 0; n < 1000; n++ {
x, y := RandomVector(n), RandomVector(n)
got := Dot(x, y)
expect := Expect(x, y)
relerr := got/expect - 1.0
if Abs(relerr) > epsilon {
t.Fatalf("bad result on vector length %d: got %v expect %v relative error %f", n, got, expect, relerr)
}
}
}
func Expect(x, y []float32) float32 {
var p float32
for i := range x {
p += x[i] * y[i]
}
return p
}
func RandomVector(n int) []float32 {
x := make([]float32, n)
for i := 0; i < n; i++ {
x[i] = rand.Float32() * 100
}
return x
}
func Abs(x float32) float32 {
if x < 0.0 {
return -x
}
return x
}

5
examples/dot/stub.go Normal file
View File

@@ -0,0 +1,5 @@
// Code generated by command: go run asm.go -out dot.s -stubs stub.go. DO NOT EDIT.
package dot
func Dot(x []float32, y []float32) float32