x86: Encode AVX256/AVX512 vpsub[bwdq] with VEX128/EVEX128

Message ID 20180227162017.GA8009@intel.com
State New
Headers show
Series
  • x86: Encode AVX256/AVX512 vpsub[bwdq] with VEX128/EVEX128
Related show

Commit Message

H.J. Lu Feb. 27, 2018, 4:20 p.m.
When 2 source registers are identical, AVX256 and AVX512 vpsub[bwdq]
instructions can be encoded with VEX128 or EVEX128 encodings.

Any comments?

H.J.
gas/

	* config/tc-i386.c (optimize_encoding): Optimize AVX256 and
	AVX512 vpsub[bwdq] instructions.
	* testsuite/gas/i386/optimize-1.s: Add tests for AVX256 and
	AVX512 vpsub[bwdq] instructions.
	* testsuite/gas/i386/x86-64-optimize-2.s: Likewise.
	* testsuite/gas/i386/optimize-1.d: Updated.
	* testsuite/gas/i386/x86-64-optimize-2.d: Likewise.

opcodes/

	* * i386-opc.tbl: Add "Optimize" to AVX256 and AVX512
	vpsub[bwdq] instructions.
	* i386-tbl.h: Regenerated.
---
 gas/config/tc-i386.c                       |  9 ++++++--
 gas/testsuite/gas/i386/optimize-1.d        | 16 +++++++++++++
 gas/testsuite/gas/i386/optimize-1.s        | 20 +++++++++++++++++
 gas/testsuite/gas/i386/x86-64-optimize-2.d | 32 ++++++++++++++++++++++++++
 gas/testsuite/gas/i386/x86-64-optimize-2.s | 36 ++++++++++++++++++++++++++++++
 opcodes/i386-opc.tbl                       | 24 ++++++++++----------
 opcodes/i386-tbl.h                         | 24 ++++++++++----------
 7 files changed, 135 insertions(+), 26 deletions(-)

Comments

H.J. Lu March 1, 2018, 2:04 p.m. | #1
On Tue, Feb 27, 2018 at 8:20 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> When 2 source registers are identical, AVX256 and AVX512 vpsub[bwdq]

> instructions can be encoded with VEX128 or EVEX128 encodings.

>

> Any comments?

>

> H.J.

> gas/

>

>         * config/tc-i386.c (optimize_encoding): Optimize AVX256 and

>         AVX512 vpsub[bwdq] instructions.

>         * testsuite/gas/i386/optimize-1.s: Add tests for AVX256 and

>         AVX512 vpsub[bwdq] instructions.

>         * testsuite/gas/i386/x86-64-optimize-2.s: Likewise.

>         * testsuite/gas/i386/optimize-1.d: Updated.

>         * testsuite/gas/i386/x86-64-optimize-2.d: Likewise.

>

> opcodes/

>

>         * * i386-opc.tbl: Add "Optimize" to AVX256 and AVX512

>         vpsub[bwdq] instructions.

>         * i386-tbl.h: Regenerated.


I am checking it in.

-- 
H.J.

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index f9dccdbd9e..4174d193ad 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3856,11 +3856,16 @@  optimize_encoding (void)
 		|| i.tm.base_opcode == 0x66df
 		|| i.tm.base_opcode == 0x57
 		|| i.tm.base_opcode == 0x6657
-		|| i.tm.base_opcode == 0x66ef)
+		|| i.tm.base_opcode == 0x66ef
+		|| i.tm.base_opcode == 0x66f8
+		|| i.tm.base_opcode == 0x66f9
+		|| i.tm.base_opcode == 0x66fa
+		|| i.tm.base_opcode == 0x66fb)
 	       && i.tm.extension_opcode == None))
     {
       /* Optimize: -O2:
-	   VOP, one of vandnps, vandnpd, vxorps and vxorpd:
+	   VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
+	   vpsubq and vpsubw:
 	     EVEX VOP %zmmM, %zmmM, %zmmN
 	       -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
diff --git a/gas/testsuite/gas/i386/optimize-1.d b/gas/testsuite/gas/i386/optimize-1.d
index 80b1e8325f..3ea6e75b9a 100644
--- a/gas/testsuite/gas/i386/optimize-1.d
+++ b/gas/testsuite/gas/i386/optimize-1.d
@@ -42,4 +42,20 @@  Disassembly of section .text:
  +[a-f0-9]+:	62 f1 f5 af ef e9    	vpxorq %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
  +[a-f0-9]+:	c5 f1 ef e9          	vpxor  %xmm1,%xmm1,%xmm5
  +[a-f0-9]+:	c5 f1 ef e9          	vpxor  %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	62 f1 75 4f f8 e9    	vpsubb %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+:	62 f1 75 af f8 e9    	vpsubb %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 f1 f8 e9          	vpsubb %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	c5 f1 f8 e9          	vpsubb %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	62 f1 75 4f f9 e9    	vpsubw %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+:	62 f1 75 af f9 e9    	vpsubw %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 f1 f9 e9          	vpsubw %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	c5 f1 f9 e9          	vpsubw %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	62 f1 75 4f fa e9    	vpsubd %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+:	62 f1 75 af fa e9    	vpsubd %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 f1 fa e9          	vpsubd %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	c5 f1 fa e9          	vpsubd %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	62 f1 f5 4f fb e9    	vpsubq %zmm1,%zmm1,%zmm5\{%k7\}
+ +[a-f0-9]+:	62 f1 f5 af fb e9    	vpsubq %ymm1,%ymm1,%ymm5\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 f1 fb e9          	vpsubq %xmm1,%xmm1,%xmm5
+ +[a-f0-9]+:	c5 f1 fb e9          	vpsubq %xmm1,%xmm1,%xmm5
 #pass
diff --git a/gas/testsuite/gas/i386/optimize-1.s b/gas/testsuite/gas/i386/optimize-1.s
index 042a0043cd..f875f92db8 100644
--- a/gas/testsuite/gas/i386/optimize-1.s
+++ b/gas/testsuite/gas/i386/optimize-1.s
@@ -46,3 +46,23 @@  _start:
 	vpxorq %ymm1, %ymm1, %ymm5{z}{%k7}
 	vpxorq %zmm1, %zmm1, %zmm5
 	vpxorq %ymm1, %ymm1, %ymm5
+
+	vpsubb %zmm1, %zmm1, %zmm5{%k7}
+	vpsubb %ymm1, %ymm1, %ymm5{z}{%k7}
+	vpsubb %zmm1, %zmm1, %zmm5
+	vpsubb %ymm1, %ymm1, %ymm5
+
+	vpsubw %zmm1, %zmm1, %zmm5{%k7}
+	vpsubw %ymm1, %ymm1, %ymm5{z}{%k7}
+	vpsubw %zmm1, %zmm1, %zmm5
+	vpsubw %ymm1, %ymm1, %ymm5
+
+	vpsubd %zmm1, %zmm1, %zmm5{%k7}
+	vpsubd %ymm1, %ymm1, %ymm5{z}{%k7}
+	vpsubd %zmm1, %zmm1, %zmm5
+	vpsubd %ymm1, %ymm1, %ymm5
+
+	vpsubq %zmm1, %zmm1, %zmm5{%k7}
+	vpsubq %ymm1, %ymm1, %ymm5{z}{%k7}
+	vpsubq %zmm1, %zmm1, %zmm5
+	vpsubq %ymm1, %ymm1, %ymm5
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.d b/gas/testsuite/gas/i386/x86-64-optimize-2.d
index f982f52ba7..f59e9b6808 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-2.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-2.d
@@ -74,4 +74,36 @@  Disassembly of section .text:
  +[a-f0-9]+:	62 e1 f5 08 ef c1    	vpxorq %xmm1,%xmm1,%xmm16
  +[a-f0-9]+:	62 b1 f5 00 ef c9    	vpxorq %xmm17,%xmm17,%xmm1
  +[a-f0-9]+:	62 b1 f5 00 ef c9    	vpxorq %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 71 75 4f f8 f9    	vpsubb %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+:	62 71 75 af f8 f9    	vpsubb %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 71 f8 f9          	vpsubb %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	c5 71 f8 f9          	vpsubb %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	62 e1 75 08 f8 c1    	vpsubb %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 e1 75 08 f8 c1    	vpsubb %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 b1 75 00 f8 c9    	vpsubb %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 b1 75 00 f8 c9    	vpsubb %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 71 75 4f f9 f9    	vpsubw %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+:	62 71 75 af f9 f9    	vpsubw %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 71 f9 f9          	vpsubw %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	c5 71 f9 f9          	vpsubw %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	62 e1 75 08 f9 c1    	vpsubw %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 e1 75 08 f9 c1    	vpsubw %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 b1 75 00 f9 c9    	vpsubw %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 b1 75 00 f9 c9    	vpsubw %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 71 75 4f fa f9    	vpsubd %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+:	62 71 75 af fa f9    	vpsubd %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 71 fa f9          	vpsubd %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	c5 71 fa f9          	vpsubd %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	62 e1 75 08 fa c1    	vpsubd %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 e1 75 08 fa c1    	vpsubd %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 b1 75 00 fa c9    	vpsubd %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 b1 75 00 fa c9    	vpsubd %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 71 f5 4f fb f9    	vpsubq %zmm1,%zmm1,%zmm15\{%k7\}
+ +[a-f0-9]+:	62 71 f5 af fb f9    	vpsubq %ymm1,%ymm1,%ymm15\{%k7\}\{z\}
+ +[a-f0-9]+:	c5 71 fb f9          	vpsubq %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	c5 71 fb f9          	vpsubq %xmm1,%xmm1,%xmm15
+ +[a-f0-9]+:	62 e1 f5 08 fb c1    	vpsubq %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 e1 f5 08 fb c1    	vpsubq %xmm1,%xmm1,%xmm16
+ +[a-f0-9]+:	62 b1 f5 00 fb c9    	vpsubq %xmm17,%xmm17,%xmm1
+ +[a-f0-9]+:	62 b1 f5 00 fb c9    	vpsubq %xmm17,%xmm17,%xmm1
 #pass
diff --git a/gas/testsuite/gas/i386/x86-64-optimize-2.s b/gas/testsuite/gas/i386/x86-64-optimize-2.s
index 6aa968b64e..4461c099ea 100644
--- a/gas/testsuite/gas/i386/x86-64-optimize-2.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-2.s
@@ -78,3 +78,39 @@  _start:
 	vpxorq %ymm1, %ymm1, %ymm16
 	vpxorq %zmm17, %zmm17, %zmm1
 	vpxorq %ymm17, %ymm17, %ymm1
+
+	vpsubb %zmm1, %zmm1, %zmm15{%k7}
+	vpsubb %ymm1, %ymm1, %ymm15{z}{%k7}
+	vpsubb %zmm1, %zmm1, %zmm15
+	vpsubb %ymm1, %ymm1, %ymm15
+	vpsubb %zmm1, %zmm1, %zmm16
+	vpsubb %ymm1, %ymm1, %ymm16
+	vpsubb %zmm17, %zmm17, %zmm1
+	vpsubb %ymm17, %ymm17, %ymm1
+
+	vpsubw %zmm1, %zmm1, %zmm15{%k7}
+	vpsubw %ymm1, %ymm1, %ymm15{z}{%k7}
+	vpsubw %zmm1, %zmm1, %zmm15
+	vpsubw %ymm1, %ymm1, %ymm15
+	vpsubw %zmm1, %zmm1, %zmm16
+	vpsubw %ymm1, %ymm1, %ymm16
+	vpsubw %zmm17, %zmm17, %zmm1
+	vpsubw %ymm17, %ymm17, %ymm1
+
+	vpsubd %zmm1, %zmm1, %zmm15{%k7}
+	vpsubd %ymm1, %ymm1, %ymm15{z}{%k7}
+	vpsubd %zmm1, %zmm1, %zmm15
+	vpsubd %ymm1, %ymm1, %ymm15
+	vpsubd %zmm1, %zmm1, %zmm16
+	vpsubd %ymm1, %ymm1, %ymm16
+	vpsubd %zmm17, %zmm17, %zmm1
+	vpsubd %ymm17, %ymm17, %ymm1
+
+	vpsubq %zmm1, %zmm1, %zmm15{%k7}
+	vpsubq %ymm1, %ymm1, %ymm15{z}{%k7}
+	vpsubq %zmm1, %zmm1, %zmm15
+	vpsubq %ymm1, %ymm1, %ymm15
+	vpsubq %zmm1, %zmm1, %zmm16
+	vpsubq %ymm1, %ymm1, %ymm16
+	vpsubq %zmm17, %zmm17, %zmm1
+	vpsubq %ymm17, %ymm17, %ymm1
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 27ac376cbf..dbd8455602 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2382,14 +2382,14 @@  vpsrlq, 3, 0x6673, 0x2, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=2|VexW=1|Ign
 vpsrlq, 3, 0x66d3, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
 vpsrlw, 3, 0x6671, 0x2, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegYMM, RegYMM }
 vpsrlw, 3, 0x66d1, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
-vpsubb, 3, 0x66f8, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpsubd, 3, 0x66fa, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpsubq, 3, 0x66fb, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpsubb, 3, 0x66f8, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpsubd, 3, 0x66fa, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpsubq, 3, 0x66fb, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpsubsb, 3, 0x66e8, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpsubsw, 3, 0x66e9, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpsubusb, 3, 0x66d8, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpsubusw, 3, 0x66d9, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpsubw, 3, 0x66f9, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpsubw, 3, 0x66f9, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpunpckhbw, 3, 0x6668, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpunpckhdq, 3, 0x666a, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
 vpunpckhqdq, 3, 0x666d, None, 1, CpuAVX2, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Ymmword|Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
@@ -3938,7 +3938,7 @@  vpaddd, 3, 0x66FE, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVV
 vpandd, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpandnd, 3, 0x66DF, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpord, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
-vpsubd, 3, 0x66FA, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+vpsubd, 3, 0x66FA, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpunpckhdq, 3, 0x666A, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpunpckldq, 3, 0x6662, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpxord, 3, 0x66EF, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|Dword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
@@ -3948,7 +3948,7 @@  vpandnq, 3, 0x66DF, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexV
 vpandq, 3, 0x66DB, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpmuludq, 3, 0x66F4, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vporq, 3, 0x66EB, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
-vpsubq, 3, 0x66FB, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+vpsubq, 3, 0x66FB, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpunpckhqdq, 3, 0x666D, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpunpcklqdq, 3, 0x666C, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpxorq, 3, 0x66EF, None, 1, CpuAVX512F, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=2|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|Qword|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
@@ -4924,7 +4924,7 @@  vpsrld, 3, 0x6672, 2, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcod
 vpsrld, 3, 0x6672, 2, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegYMM, RegYMM }
 vpsrld, 3, 0x6672, 2, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=3|VexW=1|Broadcast=2|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Dword|YMMword|Unspecified|BaseIndex, RegYMM }
 vpsubd, 3, 0x66FA, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=3|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpsubd, 3, 0x66FA, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=2|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
+vpsubd, 3, 0x66FA, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=2|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegYMM|Dword|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
 vpunpckhdq, 3, 0x666A, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=3|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpunpckhdq, 3, 0x666A, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=2|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
 vpunpckldq, 3, 0x6662, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|VexW=1|Broadcast=3|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
@@ -4963,7 +4963,7 @@  vpsrlq, 3, 0x6673, 2, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcod
 vpsrlq, 3, 0x6673, 2, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=2|VexW=2|VecESize=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegYMM, RegYMM }
 vpsrlq, 3, 0x6673, 2, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=3|VexW=2|VecESize=1|Broadcast=3|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Qword|YMMword|Unspecified|BaseIndex, RegYMM }
 vpsubq, 3, 0x66FB, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=4|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Qword|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpsubq, 3, 0x66FB, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=3|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Qword|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
+vpsubq, 3, 0x66FB, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=3|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegYMM|Qword|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
 vpunpckhqdq, 3, 0x666D, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=4|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Qword|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpunpckhqdq, 3, 0x666D, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=3|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Qword|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
 vpunpcklqdq, 3, 0x666C, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|VexW=2|VecESize=1|Broadcast=4|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Qword|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
@@ -5320,9 +5320,9 @@  vpmaxub, 3, 0x66DE, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|Vex
 vpminub, 3, 0x66DA, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpminub, 3, 0x66DA, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpminub, 3, 0x66DA, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
-vpsubb, 3, 0x66F8, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+vpsubb, 3, 0x66F8, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpsubb, 3, 0x66F8, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpsubb, 3, 0x66F8, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
+vpsubb, 3, 0x66F8, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
 vpsubsb, 3, 0x66E8, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpsubsb, 3, 0x66E8, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpsubsb, 3, 0x66E8, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
@@ -5396,9 +5396,9 @@  vpsubsw, 3, 0x66E9, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|Vex
 vpsubusw, 3, 0x66D9, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpsubusw, 3, 0x66D9, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpsubusw, 3, 0x66D9, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
-vpsubw, 3, 0x66F9, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
+vpsubw, 3, 0x66F9, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpsubw, 3, 0x66F9, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
-vpsubw, 3, 0x66F9, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
+vpsubw, 3, 0x66F9, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }
 vpunpckhwd, 3, 0x6669, None, 1, CpuAVX512BW, Modrm|EVex=1|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=6|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|ZMMword|Unspecified|BaseIndex, RegZMM, RegZMM }
 vpunpckhwd, 3, 0x6669, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|XMMword|Unspecified|BaseIndex, RegXMM, RegXMM }
 vpunpckhwd, 3, 0x6669, None, 1, CpuAVX512BW|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexVVVV=1|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|YMMword|Unspecified|BaseIndex, RegYMM, RegYMM }