shithub: riscv

Download patch

ref: ad7120b2e2f0ed0d226eec1b5ef6fc6b3eebb4ef
parent: 6ecb921719c6cb64711de9a3f4c1e2334fd352b8
author: rodri <rgl@antares-labs.eu>
date: Wed Nov 29 16:04:59 EST 2023

6[acl], libmach: add 3-byte opcode encoding support for SSE[34] along with some instructions

--- a/sys/src/cmd/6a/lex.c
+++ b/sys/src/cmd/6a/lex.c
@@ -780,6 +780,10 @@
 	"ADDPS",	LTYPE3,	AADDPS,
 	"ADDSD",	LTYPE3,	AADDSD,
 	"ADDSS",	LTYPE3,	AADDSS,
+	"HADDPD",	LTYPE3,	AHADDPD,
+	"HADDPS",	LTYPE3,	AHADDPS,
+	"ADDSUBPD",	LTYPE3,	AADDSUBPD,
+	"ADDSUBPS",	LTYPE3,	AADDSUBPS,
 	"ANDNPD",	LTYPE3,	AANDNPD,
 	"ANDNPS",	LTYPE3,	AANDNPS,
 	"ANDPD",	LTYPE3,	AANDPD,
@@ -960,6 +964,8 @@
 	"SUBPS",	LTYPE3,	ASUBPS,
 	"SUBSD",	LTYPE3,	ASUBSD,
 	"SUBSS",	LTYPE3,	ASUBSS,
+	"HSUBPD",	LTYPE3,	AHSUBPD,
+	"HSUBPS",	LTYPE3,	AHSUBPS,
 	"UCOMISD",	LTYPE3,	AUCOMISD,
 	"UCOMISS",	LTYPE3,	AUCOMISS,
 	"UNPCKHPD",	LTYPE3,	AUNPCKHPD,
@@ -968,6 +974,12 @@
 	"UNPCKLPS",	LTYPE3,	AUNPCKLPS,
 	"XORPD",	LTYPE3,	AXORPD,
 	"XORPS",	LTYPE3,	AXORPS,
+	"ROUNDSD",	LTYPEX,	AROUNDSD,
+	"ROUNDSS",	LTYPEX,	AROUNDSS,
+	"ROUNDPD",	LTYPEX,	AROUNDPD,
+	"ROUNDPS",	LTYPEX,	AROUNDPS,
+	"DPPD",		LTYPEX,	ADPPD,
+	"DPPS",		LTYPEX,	ADPPS,
 
 	0
 };
--- a/sys/src/cmd/6c/6.out.h
+++ b/sys/src/cmd/6c/6.out.h
@@ -700,6 +700,18 @@
 	AMODE,
 	
 	AMOVQL,
+	AHADDPD,
+	AHADDPS,
+	AHSUBPD,
+	AHSUBPS,
+	AADDSUBPD,
+	AADDSUBPS,
+	AROUNDSD,
+	AROUNDSS,
+	AROUNDPD,
+	AROUNDPS,
+	ADPPD,
+	ADPPS,
 
 	ALAST
 };
--- a/sys/src/cmd/6l/optab.c
+++ b/sys/src/cmd/6l/optab.c
@@ -395,6 +395,11 @@
 	Yxm,	Yxr,	Zm_r_xm,	2,
 	0
 };
+uchar	yxmi3[] =
+{
+	Yxm,	Yxr,	Zm_r_i_xm,	3,
+	0
+};
 uchar	yxr[] = 
 {
 	Yxr,	Yxr,	Zm_r_xm,	1,
@@ -522,6 +527,10 @@
 	{ AADDSD,	yxm,	Pf2, 0x58 },
 	{ AADDSS,	yxm,	Pf3, 0x58 },
 	{ AADDW,	yaddl,	Pe, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+	{ AHADDPD,	yxm,	Pq, 0x7c },
+	{ AHADDPS,	yxm,	Pf2, 0x7c },
+	{ AADDSUBPD,	yxm,	Pq, 0xd0 },
+	{ AADDSUBPS,	yxm,	Pf2, 0xd0 },
 	{ AADJSP },
 	{ AANDB,	yxorb,	Pb, 0x24,0x80,(04),0x20,0x22 },
 	{ AANDL,	yxorl,	Px, 0x83,(04),0x25,0x81,(04),0x21,0x23 },
@@ -1011,6 +1020,8 @@
 	{ ASUBSD,	yxm,	Pf2, 0x5c },
 	{ ASUBSS,	yxm,	Pf3, 0x5c },
 	{ ASUBW,	yaddl,	Pe, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },
+	{ AHSUBPD,	yxm,	Pq, 0x7d },
+	{ AHSUBPS,	yxm,	Pf2, 0x7d },
 	{ ASWAPGS,	ynone,	Pm, 0x01,0xf8 },
 	{ ASYSCALL,	ynone,	Px, 0x0f,0x05 },	/* fast syscall */
 	{ ATESTB,	ytestb,	Pb, 0xa8,0xf6,(00),0x84,0x84 },
@@ -1039,6 +1050,12 @@
 	{ AXORPS,	yxm,	Pm, 0x57 },
 	{ AXORQ,	yxorl,	Pw, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
 	{ AXORW,	yxorl,	Pe, 0x83,(06),0x35,0x81,(06),0x31,0x33 },
+	{ AROUNDSD,	yxmi3,	Pe, 0x0f,0x3a,0x0b },
+	{ AROUNDSS,	yxmi3,	Pe, 0x0f,0x3a,0x0a },
+	{ AROUNDPD,	yxmi3,	Pe, 0x0f,0x3a,0x09 },
+	{ AROUNDPS,	yxmi3,	Pe, 0x0f,0x3a,0x08 },
+	{ ADPPD,	yxmi3,	Pe, 0x0f,0x3a,0x41 },
+	{ ADPPS,	yxmi3,	Pe, 0x0f,0x3a,0x40 },
 
 	{ AFMOVB,	yfmvx,	Px, 0xdf,(04) },
 	{ AFMOVBP,	yfmvp,	Px, 0xdf,(06) },
--- a/sys/src/cmd/6l/span.c
+++ b/sys/src/cmd/6l/span.c
@@ -1014,6 +1014,10 @@
 	default:
 		if(andptr == and || andptr[-1] != Pm)
 			*andptr++ = Pm;
+		if(op == 0x38 || op == 0x3a){ /* 3-byte opcode escapes */
+			*andptr++ = op;
+			op = o->op[++z];
+		}
 		break;
 	}
 	*andptr++ = op;
--- a/sys/src/libmach/8db.c
+++ b/sys/src/libmach/8db.c
@@ -379,7 +379,12 @@
 };
 
 static Optable optab660F3A[128] = {
+[0x08]	RM,Ib,		"ROUNDPS	%i,%x,%X",
+[0x09]	RM,Ib,		"ROUNDPD	%i,%x,%X",
+[0x0A]	RM,Ib,		"ROUNDSS	%i,%x,%X",
+[0x0B]	RM,Ib,		"ROUNDSD	%i,%x,%X",
 [0x41]	RM,Ib,		"DPPD	%i,%x,%X",
+[0x40]	RM,Ib,		"DPPS	%i,%x,%X",
 };
 
 static Optable optab660F71[8]=
@@ -410,7 +415,7 @@
 [0x2E]	RM,0,		"UCOMISD	%x,%X",
 [0x2F]	RM,0,		"COMISD	%x,%X",
 [0x38]	AUX,0,		optab660F38,
-[0x3A]	AUXMM,0,	optab660F3A,		/* SSE4 */
+[0x3A]	AUXMM,0,	optab660F3A,		/* SSSE3/SSE4 */
 [0x5A]	RM,0,		"CVTPD2PS	%x,%X",
 [0x5B]	RM,0,		"CVTPS2PL	%x,%X",
 [0x6A]	RM,0,		"PUNPCKHLQ %x,%X",
@@ -423,6 +428,7 @@
 [0x71]	RMOP,0,		optab660F71,
 [0x72]	RMOP,0,		optab660F72,
 [0x73]	RMOP,0,		optab660F73,
+[0x7C]	RM,0,		"HADDPD	%x,%X",
 [0x7D]	RM,0,		"HSUBPD	%x,%X",
 [0x7E]	RM,0,		"MOV%S	%X,%e",
 [0x7F]	RM,0,		"MOVO	%X,%x",
@@ -429,6 +435,7 @@
 [0xC4]	RM,Ib,		"PINSRW	%i,%e,%X",
 [0xC5]	RMR,Ib,		"PEXTRW	%i,%X,%e",
 [0xC7]	RMM,0,		"VMCLEAR	%e",
+[0xD0]	RM,0,		"ADDSUBPD	%x,%X",
 [0xD4]	RM,0,		"PADDQ	%x,%X",
 [0xD5]	RM,0,		"PMULLW	%x,%X",
 [0xD6]	RM,0,		"MOVQ	%X,%x",
@@ -447,7 +454,10 @@
 [0x5A]	RM,0,		"CVTSD2SS	%x,%X",
 [0x6F]	RM,0,		"MOVOU	%x,%X",
 [0x70]	RM,Ib,		"PSHUFLW	%i,%x,%X",
+[0x7C]	RM,0,		"HADDPS	%x,%X",
+[0x7D]	RM,0,		"HSUBPS	%x,%X",
 [0x7F]	RM,0,		"MOVOU	%X,%x",
+[0xD0]	RM,0,		"ADDSUBPS	%x,%X",
 [0xD6]	RM,0,		"MOVQOZX	%M,%X",
 [0xE6]	RM,0,		"CVTPD2PL	%x,%X",
 };