reg,pass: refactor allocation of aliased registers (#121)

Issue #100 demonstrated that register allocation for aliased registers is
fundamentally broken. The root of the issue is that currently accesses to the
same virtual register with different masks are treated as different registers.
This PR takes a different approach:

* Liveness analysis is masked: we now properly consider which parts of a register are live
* Register allocation produces a mapping from virtual to physical ID, and aliasing is applied later

In addition, a new pass ZeroExtend32BitOutputs accounts for the fact that 32-bit writes in 64-bit mode should actually be treated as 64-bit writes (the result is zero-extended).

Closes #100
This commit is contained in:
Michael McLoughlin
2020-01-22 22:50:40 -08:00
committed by GitHub
parent 126469f13d
commit f40d602170
33 changed files with 1241 additions and 362 deletions

View File

@@ -21,8 +21,17 @@ func main() {
// Allocate registers and initialize.
x := make([]Register, n)
for i := 0; i < n; i++ {
x[i] = GP8()
i := 0
// Low byte registers.
for ; i < 15; i++ {
x[i] = GP8L()
MOVB(U8(i+1), x[i])
}
// High byte registers.
for ; i < n; i++ {
x[i] = GP8H()
MOVB(U8(i+1), x[i])
}

View File

@@ -8,28 +8,24 @@ TEXT ·GP8(SB), NOSPLIT, $0-1
MOVB $0x02, CL
MOVB $0x03, DL
MOVB $0x04, BL
MOVB $0x05, AH
MOVB $0x06, CH
MOVB $0x07, DH
MOVB $0x08, BH
MOVB $0x09, BP
MOVB $0x0a, SI
MOVB $0x0b, DI
MOVB $0x0c, R8
MOVB $0x0d, R9
MOVB $0x0e, R10
MOVB $0x0f, R11
MOVB $0x10, R12
MOVB $0x11, R13
MOVB $0x12, R14
MOVB $0x13, R15
MOVB $0x05, BP
MOVB $0x06, SI
MOVB $0x07, DI
MOVB $0x08, R8
MOVB $0x09, R9
MOVB $0x0a, R10
MOVB $0x0b, R11
MOVB $0x0c, R12
MOVB $0x0d, R13
MOVB $0x0e, R14
MOVB $0x0f, R15
MOVB $0x10, AH
MOVB $0x11, CH
MOVB $0x12, DH
MOVB $0x13, BH
ADDB CL, AL
ADDB DL, AL
ADDB BL, AL
ADDB AH, AL
ADDB CH, AL
ADDB DH, AL
ADDB BH, AL
ADDB BP, AL
ADDB SI, AL
ADDB DI, AL
@@ -41,5 +37,9 @@ TEXT ·GP8(SB), NOSPLIT, $0-1
ADDB R13, AL
ADDB R14, AL
ADDB R15, AL
ADDB AH, AL
ADDB CH, AL
ADDB DH, AL
ADDB BH, AL
MOVB AL, ret+0(FP)
RET

66
tests/alloc/masks/asm.go Normal file
View File

@@ -0,0 +1,66 @@
// +build ignore
package main
import (
"strconv"
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
// The goal of this test is to create a synthetic scenario in which register
// allocation would fail if register liveness and allocation passes didn't take
// masks into account.
//
// The idea is to create a set of 15 64-bit virtual registers (15 being total
// number of allocatable 64-bit general purpose registers). For each one: write
// to the whole register and then later write to only the low 16 bits, and
// finally consume the whole 64-bit register. This means there is an interval in
// which only the high 48-bits are live. During this interval we should be able
// to allocate and use a set of 15 16-bit virtual registers.
func main() {
const n = 15
TEXT("Masks", NOSPLIT, "func() (uint16, uint64)")
Doc("Masks computes the sum 1+2+...+" + strconv.Itoa(n) + " in two ways.")
// Step 1: Allocate n 64-bit registers A that we will arrange to live in their top 48 bits.
A := make([]GPVirtual, n)
for i := 0; i < n; i++ {
A[i] = GP64()
c := ((i + 1) << 16) | 42 // 42 in low bits will be cleared later
MOVQ(U32(c), A[i])
}
// Step 3: Allocate n 16-bit registers B.
B := make([]Register, n)
for i := 0; i < n; i++ {
B[i] = GP16()
MOVW(U16(i+1), B[i])
}
// Step 3: Sum up all the B registers and return.
for i := 1; i < n; i++ {
ADDW(B[i], B[0])
}
Store(B[0], ReturnIndex(0))
// Step 4: Clear the low 16-bits of the A registers.
for i := 0; i < n; i++ {
MOVW(U16(0), A[i].As16())
}
// Step 5: Sum up all the A registers and return.
for i := 1; i < n; i++ {
ADDQ(A[i], A[0])
}
SHRQ(U8(16), A[0])
Store(A[0], ReturnIndex(1))
RET()
Generate()
}

2
tests/alloc/masks/doc.go Normal file
View File

@@ -0,0 +1,2 @@
// Package masks tests that register liveness and allocation passes handle masks correctly.
package masks

83
tests/alloc/masks/masks.s Normal file
View File

@@ -0,0 +1,83 @@
// Code generated by command: go run asm.go -out masks.s -stubs stub.go. DO NOT EDIT.
#include "textflag.h"
// func Masks() (uint16, uint64)
TEXT ·Masks(SB), NOSPLIT, $0-16
MOVQ $0x0001002a, AX
MOVQ $0x0002002a, CX
MOVQ $0x0003002a, DX
MOVQ $0x0004002a, BX
MOVQ $0x0005002a, BP
MOVQ $0x0006002a, SI
MOVQ $0x0007002a, DI
MOVQ $0x0008002a, R8
MOVQ $0x0009002a, R9
MOVQ $0x000a002a, R10
MOVQ $0x000b002a, R11
MOVQ $0x000c002a, R12
MOVQ $0x000d002a, R13
MOVQ $0x000e002a, R14
MOVQ $0x000f002a, R15
MOVW $0x0001, AX
MOVW $0x0002, CX
MOVW $0x0003, DX
MOVW $0x0004, BX
MOVW $0x0005, BP
MOVW $0x0006, SI
MOVW $0x0007, DI
MOVW $0x0008, R8
MOVW $0x0009, R9
MOVW $0x000a, R10
MOVW $0x000b, R11
MOVW $0x000c, R12
MOVW $0x000d, R13
MOVW $0x000e, R14
MOVW $0x000f, R15
ADDW CX, AX
ADDW DX, AX
ADDW BX, AX
ADDW BP, AX
ADDW SI, AX
ADDW DI, AX
ADDW R8, AX
ADDW R9, AX
ADDW R10, AX
ADDW R11, AX
ADDW R12, AX
ADDW R13, AX
ADDW R14, AX
ADDW R15, AX
MOVW AX, ret+0(FP)
MOVW $0x0000, AX
MOVW $0x0000, CX
MOVW $0x0000, DX
MOVW $0x0000, BX
MOVW $0x0000, BP
MOVW $0x0000, SI
MOVW $0x0000, DI
MOVW $0x0000, R8
MOVW $0x0000, R9
MOVW $0x0000, R10
MOVW $0x0000, R11
MOVW $0x0000, R12
MOVW $0x0000, R13
MOVW $0x0000, R14
MOVW $0x0000, R15
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
ADDQ R9, AX
ADDQ R10, AX
ADDQ R11, AX
ADDQ R12, AX
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
SHRQ $0x10, AX
MOVQ AX, ret1+8(FP)
RET

View File

@@ -0,0 +1,15 @@
package masks
import (
"testing"
)
//go:generate go run asm.go -out masks.s -stubs stub.go
func TestMasks(t *testing.T) {
const n = 15
const expect = n * (n + 1) / 2
if got16, got64 := Masks(); got16 != expect || got64 != expect {
t.Fatalf("Masks() = %v, %v; expect %v, %v", got16, got64, expect, expect)
}
}

View File

@@ -0,0 +1,6 @@
// Code generated by command: go run asm.go -out masks.s -stubs stub.go. DO NOT EDIT.
package masks
// Masks computes the sum 1+2+...+15 in two ways.
func Masks() (uint16, uint64)

View File

@@ -0,0 +1,66 @@
// +build ignore
package main
import (
"strconv"
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
// The goal is to test for correct handling of 32-bit operands in 64-bit mode,
// specifically that writes are zero-extended to 64 bits. This test is
// constructed such that the register allocator would fail if this feature is
// not accounted for. It consists of multiple copies of a 32-bit write followed
// by a 64-bit read of the same register. Without special treatment liveness
// analysis would consider the upper 32 bits to still be live prior to the
// write. Therefore if we stack up enough copies of this, we could cause the
// register allocator to fail.
func main() {
const (
r = 14 // number of registers
m = 3 // number of iterations
n = r * m
)
TEXT("Upper32", NOSPLIT, "func() uint64")
Doc("Upper32 computes the sum 1+2+...+" + strconv.Itoa(n) + ".")
Comment("Initialize sum.")
s := GP64()
XORQ(s, s)
// Allocate n 64-bit registers and populate them.
Comment("Initialize registers.")
x := make([]GPVirtual, n)
for i := 0; i < n; i++ {
x[i] = GP64()
MOVQ(U64(0x9e77d78aacb8cbcc), x[i])
}
k := 0
for i := 0; i < m; i++ {
Commentf("Iteration %d.", i+1)
// Write to the 32-bit aliases of r registers.
for j := 0; j < r; j++ {
MOVL(U32(k+j+1), x[k+j].As32())
}
// Sum them up.
for j := 0; j < r; j++ {
ADDQ(x[k+j], s)
}
k += r
}
Comment("Store result and return.")
Store(s, ReturnIndex(0))
RET()
Generate()
}

View File

@@ -0,0 +1,2 @@
// Package upper32 tests liveness analysis of 32-bit operations on 64-bit registers.
package upper32

View File

@@ -0,0 +1,6 @@
// Code generated by command: go run asm.go -out upper32.s -stubs stub.go. DO NOT EDIT.
package upper32
// Upper32 computes the sum 1+2+...+42.
func Upper32() uint64

View File

@@ -0,0 +1,146 @@
// Code generated by command: go run asm.go -out upper32.s -stubs stub.go. DO NOT EDIT.
#include "textflag.h"
// func Upper32() uint64
TEXT ·Upper32(SB), NOSPLIT, $0-8
// Initialize sum.
XORQ AX, AX
// Initialize registers.
MOVQ $0x9e77d78aacb8cbcc, CX
MOVQ $0x9e77d78aacb8cbcc, DX
MOVQ $0x9e77d78aacb8cbcc, BX
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, SI
MOVQ $0x9e77d78aacb8cbcc, DI
MOVQ $0x9e77d78aacb8cbcc, R8
MOVQ $0x9e77d78aacb8cbcc, R9
MOVQ $0x9e77d78aacb8cbcc, R10
MOVQ $0x9e77d78aacb8cbcc, R11
MOVQ $0x9e77d78aacb8cbcc, R12
MOVQ $0x9e77d78aacb8cbcc, R13
MOVQ $0x9e77d78aacb8cbcc, R14
MOVQ $0x9e77d78aacb8cbcc, R15
MOVQ $0x9e77d78aacb8cbcc, CX
MOVQ $0x9e77d78aacb8cbcc, DX
MOVQ $0x9e77d78aacb8cbcc, BX
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, SI
MOVQ $0x9e77d78aacb8cbcc, DI
MOVQ $0x9e77d78aacb8cbcc, R8
MOVQ $0x9e77d78aacb8cbcc, R9
MOVQ $0x9e77d78aacb8cbcc, R10
MOVQ $0x9e77d78aacb8cbcc, R11
MOVQ $0x9e77d78aacb8cbcc, R12
MOVQ $0x9e77d78aacb8cbcc, R13
MOVQ $0x9e77d78aacb8cbcc, R14
MOVQ $0x9e77d78aacb8cbcc, R15
MOVQ $0x9e77d78aacb8cbcc, CX
MOVQ $0x9e77d78aacb8cbcc, DX
MOVQ $0x9e77d78aacb8cbcc, BX
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, SI
MOVQ $0x9e77d78aacb8cbcc, DI
MOVQ $0x9e77d78aacb8cbcc, R8
MOVQ $0x9e77d78aacb8cbcc, R9
MOVQ $0x9e77d78aacb8cbcc, R10
MOVQ $0x9e77d78aacb8cbcc, R11
MOVQ $0x9e77d78aacb8cbcc, R12
MOVQ $0x9e77d78aacb8cbcc, R13
MOVQ $0x9e77d78aacb8cbcc, R14
MOVQ $0x9e77d78aacb8cbcc, R15
// Iteration 1.
MOVL $0x00000001, CX
MOVL $0x00000002, DX
MOVL $0x00000003, BX
MOVL $0x00000004, BP
MOVL $0x00000005, SI
MOVL $0x00000006, DI
MOVL $0x00000007, R8
MOVL $0x00000008, R9
MOVL $0x00000009, R10
MOVL $0x0000000a, R11
MOVL $0x0000000b, R12
MOVL $0x0000000c, R13
MOVL $0x0000000d, R14
MOVL $0x0000000e, R15
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
ADDQ R9, AX
ADDQ R10, AX
ADDQ R11, AX
ADDQ R12, AX
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
// Iteration 2.
MOVL $0x0000000f, CX
MOVL $0x00000010, DX
MOVL $0x00000011, BX
MOVL $0x00000012, BP
MOVL $0x00000013, SI
MOVL $0x00000014, DI
MOVL $0x00000015, R8
MOVL $0x00000016, R9
MOVL $0x00000017, R10
MOVL $0x00000018, R11
MOVL $0x00000019, R12
MOVL $0x0000001a, R13
MOVL $0x0000001b, R14
MOVL $0x0000001c, R15
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
ADDQ R9, AX
ADDQ R10, AX
ADDQ R11, AX
ADDQ R12, AX
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
// Iteration 3.
MOVL $0x0000001d, CX
MOVL $0x0000001e, DX
MOVL $0x0000001f, BX
MOVL $0x00000020, BP
MOVL $0x00000021, SI
MOVL $0x00000022, DI
MOVL $0x00000023, R8
MOVL $0x00000024, R9
MOVL $0x00000025, R10
MOVL $0x00000026, R11
MOVL $0x00000027, R12
MOVL $0x00000028, R13
MOVL $0x00000029, R14
MOVL $0x0000002a, R15
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
ADDQ R9, AX
ADDQ R10, AX
ADDQ R11, AX
ADDQ R12, AX
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
// Store result and return.
MOVQ AX, ret+0(FP)
RET

View File

@@ -0,0 +1,15 @@
package upper32
import (
"testing"
)
//go:generate go run asm.go -out upper32.s -stubs stub.go
func TestUpper32(t *testing.T) {
const n = 14 * 3
const expect = n * (n + 1) / 2
if got := Upper32(); got != expect {
t.Fatalf("Upper32() = %v; expect %v", got, expect)
}
}

View File

@@ -0,0 +1,22 @@
// +build ignore
package main
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
)
func main() {
TEXT("Issue100", NOSPLIT, "func() uint64")
x := GP64()
XORQ(x, x)
for i := 1; i <= 100; i++ {
t := GP64()
MOVQ(U32(i), t)
ADDQ(t.As64(), x)
}
Store(x, ReturnIndex(0))
RET()
Generate()
}

View File

@@ -0,0 +1,2 @@
// Package issue100 contains a reproducer for a bug in aliased register allocation.
package issue100

View File

@@ -0,0 +1,209 @@
// Code generated by command: go run asm.go -out issue100.s -stubs stub.go. DO NOT EDIT.
#include "textflag.h"
// func Issue100() uint64
TEXT ·Issue100(SB), NOSPLIT, $0-8
XORQ AX, AX
MOVQ $0x00000001, CX
ADDQ CX, AX
MOVQ $0x00000002, CX
ADDQ CX, AX
MOVQ $0x00000003, CX
ADDQ CX, AX
MOVQ $0x00000004, CX
ADDQ CX, AX
MOVQ $0x00000005, CX
ADDQ CX, AX
MOVQ $0x00000006, CX
ADDQ CX, AX
MOVQ $0x00000007, CX
ADDQ CX, AX
MOVQ $0x00000008, CX
ADDQ CX, AX
MOVQ $0x00000009, CX
ADDQ CX, AX
MOVQ $0x0000000a, CX
ADDQ CX, AX
MOVQ $0x0000000b, CX
ADDQ CX, AX
MOVQ $0x0000000c, CX
ADDQ CX, AX
MOVQ $0x0000000d, CX
ADDQ CX, AX
MOVQ $0x0000000e, CX
ADDQ CX, AX
MOVQ $0x0000000f, CX
ADDQ CX, AX
MOVQ $0x00000010, CX
ADDQ CX, AX
MOVQ $0x00000011, CX
ADDQ CX, AX
MOVQ $0x00000012, CX
ADDQ CX, AX
MOVQ $0x00000013, CX
ADDQ CX, AX
MOVQ $0x00000014, CX
ADDQ CX, AX
MOVQ $0x00000015, CX
ADDQ CX, AX
MOVQ $0x00000016, CX
ADDQ CX, AX
MOVQ $0x00000017, CX
ADDQ CX, AX
MOVQ $0x00000018, CX
ADDQ CX, AX
MOVQ $0x00000019, CX
ADDQ CX, AX
MOVQ $0x0000001a, CX
ADDQ CX, AX
MOVQ $0x0000001b, CX
ADDQ CX, AX
MOVQ $0x0000001c, CX
ADDQ CX, AX
MOVQ $0x0000001d, CX
ADDQ CX, AX
MOVQ $0x0000001e, CX
ADDQ CX, AX
MOVQ $0x0000001f, CX
ADDQ CX, AX
MOVQ $0x00000020, CX
ADDQ CX, AX
MOVQ $0x00000021, CX
ADDQ CX, AX
MOVQ $0x00000022, CX
ADDQ CX, AX
MOVQ $0x00000023, CX
ADDQ CX, AX
MOVQ $0x00000024, CX
ADDQ CX, AX
MOVQ $0x00000025, CX
ADDQ CX, AX
MOVQ $0x00000026, CX
ADDQ CX, AX
MOVQ $0x00000027, CX
ADDQ CX, AX
MOVQ $0x00000028, CX
ADDQ CX, AX
MOVQ $0x00000029, CX
ADDQ CX, AX
MOVQ $0x0000002a, CX
ADDQ CX, AX
MOVQ $0x0000002b, CX
ADDQ CX, AX
MOVQ $0x0000002c, CX
ADDQ CX, AX
MOVQ $0x0000002d, CX
ADDQ CX, AX
MOVQ $0x0000002e, CX
ADDQ CX, AX
MOVQ $0x0000002f, CX
ADDQ CX, AX
MOVQ $0x00000030, CX
ADDQ CX, AX
MOVQ $0x00000031, CX
ADDQ CX, AX
MOVQ $0x00000032, CX
ADDQ CX, AX
MOVQ $0x00000033, CX
ADDQ CX, AX
MOVQ $0x00000034, CX
ADDQ CX, AX
MOVQ $0x00000035, CX
ADDQ CX, AX
MOVQ $0x00000036, CX
ADDQ CX, AX
MOVQ $0x00000037, CX
ADDQ CX, AX
MOVQ $0x00000038, CX
ADDQ CX, AX
MOVQ $0x00000039, CX
ADDQ CX, AX
MOVQ $0x0000003a, CX
ADDQ CX, AX
MOVQ $0x0000003b, CX
ADDQ CX, AX
MOVQ $0x0000003c, CX
ADDQ CX, AX
MOVQ $0x0000003d, CX
ADDQ CX, AX
MOVQ $0x0000003e, CX
ADDQ CX, AX
MOVQ $0x0000003f, CX
ADDQ CX, AX
MOVQ $0x00000040, CX
ADDQ CX, AX
MOVQ $0x00000041, CX
ADDQ CX, AX
MOVQ $0x00000042, CX
ADDQ CX, AX
MOVQ $0x00000043, CX
ADDQ CX, AX
MOVQ $0x00000044, CX
ADDQ CX, AX
MOVQ $0x00000045, CX
ADDQ CX, AX
MOVQ $0x00000046, CX
ADDQ CX, AX
MOVQ $0x00000047, CX
ADDQ CX, AX
MOVQ $0x00000048, CX
ADDQ CX, AX
MOVQ $0x00000049, CX
ADDQ CX, AX
MOVQ $0x0000004a, CX
ADDQ CX, AX
MOVQ $0x0000004b, CX
ADDQ CX, AX
MOVQ $0x0000004c, CX
ADDQ CX, AX
MOVQ $0x0000004d, CX
ADDQ CX, AX
MOVQ $0x0000004e, CX
ADDQ CX, AX
MOVQ $0x0000004f, CX
ADDQ CX, AX
MOVQ $0x00000050, CX
ADDQ CX, AX
MOVQ $0x00000051, CX
ADDQ CX, AX
MOVQ $0x00000052, CX
ADDQ CX, AX
MOVQ $0x00000053, CX
ADDQ CX, AX
MOVQ $0x00000054, CX
ADDQ CX, AX
MOVQ $0x00000055, CX
ADDQ CX, AX
MOVQ $0x00000056, CX
ADDQ CX, AX
MOVQ $0x00000057, CX
ADDQ CX, AX
MOVQ $0x00000058, CX
ADDQ CX, AX
MOVQ $0x00000059, CX
ADDQ CX, AX
MOVQ $0x0000005a, CX
ADDQ CX, AX
MOVQ $0x0000005b, CX
ADDQ CX, AX
MOVQ $0x0000005c, CX
ADDQ CX, AX
MOVQ $0x0000005d, CX
ADDQ CX, AX
MOVQ $0x0000005e, CX
ADDQ CX, AX
MOVQ $0x0000005f, CX
ADDQ CX, AX
MOVQ $0x00000060, CX
ADDQ CX, AX
MOVQ $0x00000061, CX
ADDQ CX, AX
MOVQ $0x00000062, CX
ADDQ CX, AX
MOVQ $0x00000063, CX
ADDQ CX, AX
MOVQ $0x00000064, CX
ADDQ CX, AX
MOVQ AX, ret+0(FP)
RET

View File

@@ -0,0 +1,15 @@
package issue100
import (
"testing"
)
//go:generate go run asm.go -out issue100.s -stubs stub.go
func TestIssue100(t *testing.T) {
n := uint64(100)
expect := n * (n + 1) / 2
if got := Issue100(); got != expect {
t.Fatalf("Issue100() = %v; expect %v", got, expect)
}
}

View File

@@ -0,0 +1,5 @@
// Code generated by command: go run asm.go -out issue100.s -stubs stub.go. DO NOT EDIT.
package issue100
func Issue100() uint64

View File

@@ -0,0 +1,23 @@
// +build generate
//go:generate go run $GOFILE
// Regression test for a bug where casting a physical register would give the
// error "non physical register found".
//
// See: https://github.com/mmcloughlin/avo/issues/65#issuecomment-576850145
package main
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
func main() {
TEXT("Issue65", NOSPLIT, "func()")
VINSERTI128(Imm(1), Y0.AsX(), Y1, Y2)
RET()
Generate()
}