diff --git a/internal/gen/loadertest.go b/internal/gen/loadertest.go
index cedfb20..8e1dbed 100644
--- a/internal/gen/loadertest.go
+++ b/internal/gen/loadertest.go
@@ -16,10 +16,13 @@ func (l LoaderTest) Generate(w io.Writer, is []*inst.Instruction) error {
 
 	p.printf("TEXT loadertest(SB), 0, $0\n")
 
+	counts := map[string]int{}
+
 	for _, i := range is {
 		p.printf("\t// %s %s\n", i.Opcode, i.Summary)
 		if skip, msg := l.skip(i.Opcode); skip {
 			p.printf("\t// SKIP: %s\n", msg)
+			counts["skip"]++
 			continue
 		}
 
@@ -28,15 +31,21 @@ func (l LoaderTest) Generate(w io.Writer, is []*inst.Instruction) error {
 			p.printf("\t// %#v\n", f.Operands)
 			if as == nil {
 				p.printf("\t// TODO\n")
+				counts["todo"]++
 				continue
 			}
 			p.printf("\t%s\t%s\n", i.Opcode, strings.Join(as, ", "))
+			counts["total"]++
 		}
 		p.printf("\n")
 	}
 
 	p.printf("\tRET\n")
 
+	for m, c := range counts {
+		p.printf("// %s: %d\n", m, c)
+	}
+
 	return p.Err()
 }
 
@@ -68,29 +77,30 @@ func args(ops []inst.Operand) []string {
 // arg generates an argument for an operand of the given type.
 func arg(t string) string {
 	m := map[string]string{
-		// <xs:enumeration value="1" />
-		// <xs:enumeration value="3" />
+		"1":     "$1", // <xs:enumeration value="1" />
+		"3":     "$3", // <xs:enumeration value="3" />
 		"imm2u": "$3",
 		// <xs:enumeration value="imm4" />
 		"imm8":  fmt.Sprintf("$%d", math.MaxInt8),  // <xs:enumeration value="imm8" />
 		"imm16": fmt.Sprintf("$%d", math.MaxInt16), // <xs:enumeration value="imm16" />
 		"imm32": fmt.Sprintf("$%d", math.MaxInt32), // <xs:enumeration value="imm32" />
 		"imm64": fmt.Sprintf("$%d", math.MaxInt64), // <xs:enumeration value="imm64" />
-		// <xs:enumeration value="al" />
-		// <xs:enumeration value="cl" />
+		"al":    "AL",                              // <xs:enumeration value="al" />
+		"cl":    "CL",                              // <xs:enumeration value="cl" />
+
 		// <xs:enumeration value="r8" />
 		// <xs:enumeration value="r8l" />
 		// <xs:enumeration value="ax" />
 		// <xs:enumeration value="r16" />
 		// <xs:enumeration value="r16l" />
-		// <xs:enumeration value="eax" />
+		"eax": "AX", // <xs:enumeration value="eax" />
 		// <xs:enumeration value="r32" />
 		// <xs:enumeration value="r32l" />
-		// <xs:enumeration value="rax" />
+		"rax": "AX",  // <xs:enumeration value="rax" />
 		"r64": "R15", // <xs:enumeration value="r64" />
 		// <xs:enumeration value="mm" />
-		// <xs:enumeration value="xmm0" />
-		"xmm": "X7", // <xs:enumeration value="xmm" />
+		"xmm0": "X0", // <xs:enumeration value="xmm0" />
+		"xmm":  "X7", // <xs:enumeration value="xmm" />
 		// <xs:enumeration value="xmm{k}" />
 		// <xs:enumeration value="xmm{k}{z}" />
 		// <xs:enumeration value="ymm" />
@@ -104,16 +114,16 @@ func arg(t string) string {
 		// <xs:enumeration value="moffs32" />
 		// <xs:enumeration value="moffs64" />
 		// <xs:enumeration value="m" />
-		// <xs:enumeration value="m8" />
-		// <xs:enumeration value="m16" />
+		"m8":  "8(AX)(CX*2)",  // <xs:enumeration value="m8" />
+		"m16": "16(AX)(CX*2)", // <xs:enumeration value="m16" />
 		// <xs:enumeration value="m16{k}{z}" />
-		// <xs:enumeration value="m32" />
+		"m32": "32(AX)(CX*2)", // <xs:enumeration value="m32" />
 		// <xs:enumeration value="m32{k}" />
 		// <xs:enumeration value="m32{k}{z}" />
-		// <xs:enumeration value="m64" />
+		"m64": "64(AX)(CX*2)", // <xs:enumeration value="m64" />
 		// <xs:enumeration value="m64{k}" />
 		// <xs:enumeration value="m64{k}{z}" />
-		// <xs:enumeration value="m128" />
+		"m128": "128(AX)(CX*2)", // <xs:enumeration value="m128" />
 		// <xs:enumeration value="m128{k}{z}" />
 		// <xs:enumeration value="m256" />
 		// <xs:enumeration value="m256{k}{z}" />
diff --git a/internal/load/load.go b/internal/load/load.go
index cd15aa4..207e374 100644
--- a/internal/load/load.go
+++ b/internal/load/load.go
@@ -19,8 +19,8 @@ type Loader struct {
 	X86CSVPath     string
 	OpcodesXMLPath string
 
-	alias          map[opcodescsv.Alias]string
-	usesIntelOrder map[string]bool
+	alias map[opcodescsv.Alias]string
+	order map[string]opcodescsv.OperandOrder
 }
 
 func NewLoaderFromDataDir(dir string) *Loader {
@@ -81,7 +81,11 @@ func (l *Loader) init() error {
 		return err
 	}
 
-	l.usesIntelOrder = opcodescsv.BuildIntelOrderSet(icsv)
+	// for a, op := range l.alias {
+	// 	log.Printf("alias %#v -> %s", a, op)
+	// }
+
+	l.order = opcodescsv.BuildOrderMap(icsv)
 
 	return nil
 }
@@ -89,14 +93,14 @@ func (l *Loader) init() error {
 // include decides whether to include the instruction form in the avo listing.
 // This discards some opcodes that are not supported in Go.
 func (l Loader) include(f opcodesxml.Form) bool {
-	// Exclude certain ISAs simply not present in Go (AMD-only is a common reason).
+	// Exclude certain ISAs simply not present in Go
 	for _, isa := range f.ISA {
 		switch isa.ID {
+		// Most of these are AMD-only.
 		case "TBM", "CLZERO", "MONITORX", "FEMMS", "FMA4", "XOP", "SSE4A":
 			return false
-		}
-		// TODO(mbm): support AVX512
-		if strings.HasPrefix(isa.ID, "AVX512") {
+		// Incomplete support for some prefetching instructions.
+		case "PREFETCH", "PREFETCHW", "PREFETCHWT1", "CLWB":
 			return false
 		}
 	}
@@ -119,12 +123,19 @@ func (l Loader) include(f opcodesxml.Form) bool {
 }
 
 func (l Loader) lookupAlias(f opcodesxml.Form) string {
-	a := opcodescsv.Alias{
+	// Attempt lookup with datasize.
+	k := opcodescsv.Alias{
 		Opcode:      f.GASName,
 		DataSize:    datasize(f),
 		NumOperands: len(f.Operands),
 	}
-	return l.alias[a]
+	if a := l.alias[k]; a != "" {
+		return a
+	}
+
+	// Fallback to unknown datasize.
+	k.DataSize = 0
+	return l.alias[k]
 }
 
 func (l Loader) gonames(f opcodesxml.Form) []string {
@@ -152,7 +163,9 @@ func (l Loader) gonames(f opcodesxml.Form) []string {
 	s := datasize(f)
 	suffix := map[int]string{16: "W", 32: "L", 64: "Q", 128: "X", 256: "Y"}
 	switch n {
-	case "RDRAND", "RDSEED":
+	case "VCVTUSI2SS", "VCVTSD2USI", "VCVTSS2USI", "VCVTUSI2SD", "VCVTTSS2USI", "VCVTTSD2USI":
+		fallthrough
+	case "RDRAND", "RDSEED", "MOVBEQ":
 		n += suffix[s]
 	}
 
@@ -162,7 +175,16 @@ func (l Loader) gonames(f opcodesxml.Form) []string {
 func (l Loader) form(opcode string, f opcodesxml.Form) inst.Form {
 	// Map operands to avo format and ensure correct order.
 	ops := operands(f.Operands)
-	if !l.usesIntelOrder[opcode] {
+
+	switch l.order[opcode] {
+	case opcodescsv.IntelOrder:
+		// Nothing to do.
+	case opcodescsv.CMP3Order:
+		ops[0], ops[1] = ops[1], ops[0]
+	case opcodescsv.UnknownOrder:
+		// Instructions not in x86 CSV are assumed to have reverse intel order.
+		fallthrough
+	case opcodescsv.ReverseIntelOrder:
 		for l, r := 0, len(ops)-1; l < r; l, r = l+1, r-1 {
 			ops[l], ops[r] = ops[r], ops[l]
 		}
@@ -171,7 +193,23 @@ func (l Loader) form(opcode string, f opcodesxml.Form) inst.Form {
 	// Handle some exceptions.
 	// TODO(mbm): consider if there's some nicer way to handle the list of special cases.
 	switch opcode {
-	case "SHA1RNDS4":
+	// Go assembler has an internal Yu2 operand type for unsigned 2-bit immediates.
+	//
+	// Reference: https://github.com/golang/go/blob/6d5caf38e37bf9aeba3291f1f0b0081f934b1187/src/cmd/internal/obj/x86/asm6.go#L109
+	//
+	//		Yu2 // $x, x fits in uint2
+	//
+	// Reference: https://github.com/golang/go/blob/6d5caf38e37bf9aeba3291f1f0b0081f934b1187/src/cmd/internal/obj/x86/asm6.go#L858-L864
+	//
+	//	var yextractps = []ytab{
+	//		{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
+	//	}
+	//
+	//	var ysha1rnds4 = []ytab{
+	//		{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
+	//	}
+	//
+	case "SHA1RNDS4", "EXTRACTPS":
 		ops[0].Type = "imm2u"
 	}
 
@@ -220,7 +258,7 @@ func datasize(f opcodesxml.Form) int {
 }
 
 func operandsize(op opcodesxml.Operand) int {
-	for s := 8; s <= 256; s *= 2 {
+	for s := 256; s >= 8; s /= 2 {
 		if strings.HasSuffix(op.Type, strconv.Itoa(s)) {
 			return s
 		}
diff --git a/internal/opcodescsv/analysis.go b/internal/opcodescsv/analysis.go
index 9cf8908..3a78dd4 100644
--- a/internal/opcodescsv/analysis.go
+++ b/internal/opcodescsv/analysis.go
@@ -18,15 +18,15 @@ type Alias struct {
 func BuildAliasMap(is []*x86csv.Inst) (map[Alias]string, error) {
 	m := map[Alias]string{}
 	for _, i := range is {
+		if skip(i) {
+			continue
+		}
+
 		s, err := strconv.Atoi("0" + i.DataSize)
 		if err != nil {
 			return nil, err
 		}
 
-		if strings.Contains(i.GoOpcode(), "/") {
-			continue
-		}
-
 		for _, alt := range []string{i.IntelOpcode(), i.GNUOpcode()} {
 			if strings.ToUpper(alt) != i.GoOpcode() {
 				a := Alias{
@@ -41,13 +41,72 @@ func BuildAliasMap(is []*x86csv.Inst) (map[Alias]string, error) {
 	return m, nil
 }
 
-// BuildIntelOrderSet builds the set of instructions that use intel order rather than the usual GNU/AT&T order.
-func BuildIntelOrderSet(is []*x86csv.Inst) map[string]bool {
-	s := map[string]bool{}
+type OperandOrder uint8
+
+const (
+	UnknownOrder = iota
+	IntelOrder
+	ReverseIntelOrder
+	CMP3Order
+)
+
+// BuildOrderMap collects operand order information from the instruction list.
+func BuildOrderMap(is []*x86csv.Inst) map[string]OperandOrder {
+	s := map[string]OperandOrder{}
 	for _, i := range is {
-		if !reflect.DeepEqual(i.GoArgs(), i.GNUArgs()) {
-			s[i.GoOpcode()] = true
+		if skip(i) {
+			continue
 		}
+		s[i.GoOpcode()] = order(i)
 	}
 	return s
 }
+
+// order categorizes the operand order of an instruction.
+func order(i *x86csv.Inst) OperandOrder {
+	// Is it Intel order already?
+	intel := i.IntelArgs()
+	if reflect.DeepEqual(i.GoArgs(), intel) {
+		return IntelOrder
+	}
+
+	// Check if it's reverse Intel.
+	for l, r := 0, len(intel)-1; l < r; l, r = l+1, r-1 {
+		intel[l], intel[r] = intel[r], intel[l]
+	}
+	if reflect.DeepEqual(i.GoArgs(), intel) {
+		return ReverseIntelOrder
+	}
+
+	// Otherwise we could be in the bizarre special-case of 3-argument CMP instructions.
+	//
+	// Reference: https://github.com/golang/arch/blob/b19384d3c130858bb31a343ea8fce26be71b5998/x86/x86spec/format.go#L138-L144
+	//
+	//			case "CMPPD", "CMPPS", "CMPSD", "CMPSS":
+	//				// rotate destination to end but don't swap comparison operands
+	//				if len(args) == 3 {
+	//					args[0], args[1], args[2] = args[2], args[0], args[1]
+	//					break
+	//				}
+	//				fallthrough
+	//
+	switch i.GoOpcode() {
+	case "CMPPD", "CMPPS", "CMPSD", "CMPSS":
+		if len(i.GoArgs()) == 3 {
+			return CMP3Order
+		}
+	}
+
+	return UnknownOrder
+}
+
+// skip decides whether to ignore the instruction for analysis purposes.
+func skip(i *x86csv.Inst) bool {
+	switch {
+	case strings.Contains(i.GoOpcode(), "/"):
+		return true
+	case i.Mode64 == "I": // Invalid in 64-bit mode.
+		return true
+	}
+	return false
+}
diff --git a/internal/opcodescsv/analysis_test.go b/internal/opcodescsv/analysis_test.go
new file mode 100644
index 0000000..7f40165
--- /dev/null
+++ b/internal/opcodescsv/analysis_test.go
@@ -0,0 +1,20 @@
+package opcodescsv
+
+import (
+	"testing"
+)
+
+func TestBuildOrderMap(t *testing.T) {
+	is, err := ReadFile("testdata/x86.v0.2.csv")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	orders := BuildOrderMap(is)
+
+	for opcode, order := range orders {
+		if order == UnknownOrder {
+			t.Errorf("%s has unknown order", opcode)
+		}
+	}
+}
diff --git a/internal/opcodescsv/testdata b/internal/opcodescsv/testdata
new file mode 120000
index 0000000..4909e06
--- /dev/null
+++ b/internal/opcodescsv/testdata
@@ -0,0 +1 @@
+../data
\ No newline at end of file