ref: ad7120b2e2f0ed0d226eec1b5ef6fc6b3eebb4ef
parent: 6ecb921719c6cb64711de9a3f4c1e2334fd352b8
author: rodri <rgl@antares-labs.eu>
date: Wed Nov 29 16:04:59 EST 2023
6[acl], libmach: add 3-byte opcode encoding support for SSE[34] along with some instructions
--- a/sys/src/cmd/6a/lex.c
+++ b/sys/src/cmd/6a/lex.c
@@ -780,6 +780,10 @@
"ADDPS", LTYPE3, AADDPS,
"ADDSD", LTYPE3, AADDSD,
"ADDSS", LTYPE3, AADDSS,
+ "HADDPD", LTYPE3, AHADDPD,
+ "HADDPS", LTYPE3, AHADDPS,
+ "ADDSUBPD", LTYPE3, AADDSUBPD,
+ "ADDSUBPS", LTYPE3, AADDSUBPS,
"ANDNPD", LTYPE3, AANDNPD,
"ANDNPS", LTYPE3, AANDNPS,
"ANDPD", LTYPE3, AANDPD,
@@ -960,6 +964,8 @@
"SUBPS", LTYPE3, ASUBPS,
"SUBSD", LTYPE3, ASUBSD,
"SUBSS", LTYPE3, ASUBSS,
+ "HSUBPD", LTYPE3, AHSUBPD,
+ "HSUBPS", LTYPE3, AHSUBPS,
"UCOMISD", LTYPE3, AUCOMISD,
"UCOMISS", LTYPE3, AUCOMISS,
"UNPCKHPD", LTYPE3, AUNPCKHPD,
@@ -968,6 +974,12 @@
"UNPCKLPS", LTYPE3, AUNPCKLPS,
"XORPD", LTYPE3, AXORPD,
"XORPS", LTYPE3, AXORPS,
+ "ROUNDSD", LTYPEX, AROUNDSD,
+ "ROUNDSS", LTYPEX, AROUNDSS,
+ "ROUNDPD", LTYPEX, AROUNDPD,
+ "ROUNDPS", LTYPEX, AROUNDPS,
+ "DPPD", LTYPEX, ADPPD,
+ "DPPS", LTYPEX, ADPPS,
0
};
--- a/sys/src/cmd/6c/6.out.h
+++ b/sys/src/cmd/6c/6.out.h
@@ -700,6 +700,18 @@
AMODE,
AMOVQL,
+ AHADDPD,
+ AHADDPS,
+ AHSUBPD,
+ AHSUBPS,
+ AADDSUBPD,
+ AADDSUBPS,
+ AROUNDSD,
+ AROUNDSS,
+ AROUNDPD,
+ AROUNDPS,
+ ADPPD,
+ ADPPS,
ALAST
};
--- a/sys/src/cmd/6l/optab.c
+++ b/sys/src/cmd/6l/optab.c
@@ -395,6 +395,11 @@
Yxm, Yxr, Zm_r_xm, 2,
0
};
+uchar yxmi3[] =
+{
+ Yxm, Yxr, Zm_r_i_xm, 3,
+ 0
+};
uchar yxr[] =
{
Yxr, Yxr, Zm_r_xm, 1,
@@ -522,6 +527,10 @@
{ AADDSD, yxm, Pf2, 0x58 },
{ AADDSS, yxm, Pf3, 0x58 },
{ AADDW, yaddl, Pe, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+ { AHADDPD, yxm, Pq, 0x7c },
+ { AHADDPS, yxm, Pf2, 0x7c },
+ { AADDSUBPD, yxm, Pq, 0xd0 },
+ { AADDSUBPS, yxm, Pf2, 0xd0 },
{ AADJSP },
{ AANDB, yxorb, Pb, 0x24,0x80,(04),0x20,0x22 },
{ AANDL, yxorl, Px, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
@@ -1011,6 +1020,8 @@
{ ASUBSD, yxm, Pf2, 0x5c },
{ ASUBSS, yxm, Pf3, 0x5c },
{ ASUBW, yaddl, Pe, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
+ { AHSUBPD, yxm, Pq, 0x7d },
+ { AHSUBPS, yxm, Pf2, 0x7d },
{ ASWAPGS, ynone, Pm, 0x01,0xf8 },
{ ASYSCALL, ynone, Px, 0x0f,0x05 }, /* fast syscall */
{ ATESTB, ytestb, Pb, 0xa8,0xf6,(00),0x84,0x84 },
@@ -1039,6 +1050,12 @@
{ AXORPS, yxm, Pm, 0x57 },
{ AXORQ, yxorl, Pw, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
{ AXORW, yxorl, Pe, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
+ { AROUNDSD, yxmi3, Pe, 0x0f,0x3a,0x0b },
+ { AROUNDSS, yxmi3, Pe, 0x0f,0x3a,0x0a },
+ { AROUNDPD, yxmi3, Pe, 0x0f,0x3a,0x09 },
+ { AROUNDPS, yxmi3, Pe, 0x0f,0x3a,0x08 },
+ { ADPPD, yxmi3, Pe, 0x0f,0x3a,0x41 },
+ { ADPPS, yxmi3, Pe, 0x0f,0x3a,0x40 },
{ AFMOVB, yfmvx, Px, 0xdf,(04) },
{ AFMOVBP, yfmvp, Px, 0xdf,(06) },
--- a/sys/src/cmd/6l/span.c
+++ b/sys/src/cmd/6l/span.c
@@ -1014,6 +1014,10 @@
default:
if(andptr == and || andptr[-1] != Pm)
*andptr++ = Pm;
+ if(op == 0x38 || op == 0x3a){ /* 3-byte opcode escapes */
+ *andptr++ = op;
+ op = o->op[++z];
+ }
break;
}
*andptr++ = op;
--- a/sys/src/libmach/8db.c
+++ b/sys/src/libmach/8db.c
@@ -379,7 +379,12 @@
};
static Optable optab660F3A[128] = {
+[0x08] RM,Ib, "ROUNDPS %i,%x,%X",
+[0x09] RM,Ib, "ROUNDPD %i,%x,%X",
+[0x0A] RM,Ib, "ROUNDSS %i,%x,%X",
+[0x0B] RM,Ib, "ROUNDSD %i,%x,%X",
[0x41] RM,Ib, "DPPD %i,%x,%X",
+[0x40] RM,Ib, "DPPS %i,%x,%X",
};
static Optable optab660F71[8]=
@@ -410,7 +415,7 @@
[0x2E] RM,0, "UCOMISD %x,%X",
[0x2F] RM,0, "COMISD %x,%X",
[0x38] AUX,0, optab660F38,
-[0x3A] AUXMM,0, optab660F3A, /* SSE4 */
+[0x3A] AUXMM,0, optab660F3A, /* SSSE3/SSE4 */
[0x5A] RM,0, "CVTPD2PS %x,%X",
[0x5B] RM,0, "CVTPS2PL %x,%X",
[0x6A] RM,0, "PUNPCKHLQ %x,%X",
@@ -423,6 +428,7 @@
[0x71] RMOP,0, optab660F71,
[0x72] RMOP,0, optab660F72,
[0x73] RMOP,0, optab660F73,
+[0x7C] RM,0, "HADDPD %x,%X",
[0x7D] RM,0, "HSUBPD %x,%X",
[0x7E] RM,0, "MOV%S %X,%e",
[0x7F] RM,0, "MOVO %X,%x",
@@ -429,6 +435,7 @@
[0xC4] RM,Ib, "PINSRW %i,%e,%X",
[0xC5] RMR,Ib, "PEXTRW %i,%X,%e",
[0xC7] RMM,0, "VMCLEAR %e",
+[0xD0] RM,0, "ADDSUBPD %x,%X",
[0xD4] RM,0, "PADDQ %x,%X",
[0xD5] RM,0, "PMULLW %x,%X",
[0xD6] RM,0, "MOVQ %X,%x",
@@ -447,7 +454,10 @@
[0x5A] RM,0, "CVTSD2SS %x,%X",
[0x6F] RM,0, "MOVOU %x,%X",
[0x70] RM,Ib, "PSHUFLW %i,%x,%X",
+[0x7C] RM,0, "HADDPS %x,%X",
+[0x7D] RM,0, "HSUBPS %x,%X",
[0x7F] RM,0, "MOVOU %X,%x",
+[0xD0] RM,0, "ADDSUBPS %x,%X",
[0xD6] RM,0, "MOVQOZX %M,%X",
[0xE6] RM,0, "CVTPD2PL %x,%X",
};