avo/tests/alloc/zeroing/asm.go

//go:build ignore
// +build ignore

package main

import (
	"strconv"

	. "github.com/mmcloughlin/avo/build"
	. "github.com/mmcloughlin/avo/operand"
	. "github.com/mmcloughlin/avo/reg"
)

// The goal of this test is to confirm correct liveness analysis of zeroing mode
// when masking in AVX-512. In merge masking, some of the bits of the output
// register will be preserved, so the register is live coming into the
// instruction. Zeroing mode removes any input dependency.
//
// This synthetic test sets up a situation where we allocate multiple temporary
// registers. Allocation is only feasible if the liveness pass correctly
// identifies that they are not all live at once.

func main() {
	const n = 32

	TEXT("Zeroing", NOSPLIT, "func(out *[8]uint64)")
	Doc("Zeroing computes the sum 1+2+...+" + strconv.Itoa(n) + " in 8 lanes of 512-bit register.")

	out := Load(Param("out"), GP64())

	Comment("Initialize sum.")
	s := ZMM()
	VPXORD(s, s, s)

	// Allocate registers for the terms of the sum. Write garbage to them.
	//
	// The point here is that under merge-masking, or an incorrect handling of
	// zeroing-masking, these registers would be live from this point. And there
	// would be too many of them so register allocation would fail.
	Comment("Initialize summand registers.")
	filler := GP64()
	MOVQ(U64(0x9e77d78aacb8cbcc), filler)

	z := make([]VecVirtual, n)
	for i := 0; i < n; i++ {
		z[i] = ZMM()
		VPBROADCASTQ(filler, z[i])
	}

	// Prepare a mask register set to all ones.
	Comment("Prepare mask register.")
	k := K()
	KXNORW(k, k, k)

	// Prepare an increment register set to 1 in each lane.
	Comment("Prepare constant registers.")
	one := GP64()
	MOVQ(U64(1), one)
	ones := ZMM()
	VPBROADCASTQ(one, ones)

	zero := ZMM()
	VPXORD(zero, zero, zero)

	last := zero
	for i := 0; i < n; i++ {
		Commentf("Summand %d.", i+1)
		VPADDD_Z(last, ones, k, z[i])
		VPADDD(s, z[i], s)
		last = z[i]
	}

	Comment("Write result to output pointer.")
	VMOVDQU64(s, Mem{Base: out})

	RET()

	Generate()
}