shithub: bench9

Download patch

ref: bbdcad5c85ca1a0daa21bf1f8f70ea9394cb7a02
parent: cce6dfb94a75af67751e2fab0d961a004e9e51d5
author: Sigrid Solveig Haflínudóttir <ftrvxmtrx@gmail.com>
date: Sat Oct 31 20:39:40 EDT 2020

use proper serialization; use RDTSCP if possible

--- a/b.c
+++ b/b.c
@@ -13,6 +13,19 @@
 
 static uvlong adj;
 
+typedef struct Res {
+	ulong ax, bx, cx, dx;
+}Res;
+
+Res cpuid(ulong ax, ulong cx);
+
+void _tstart(uvlong *c);
+void _tend(uvlong *c);
+void _tstartp(uvlong *c);
+void _tendp(uvlong *c);
+void (*tstart)(uvlong *c);
+void (*tend)(uvlong *c);
+
 uvlong
 cycles2ns(uvlong x)
 {
@@ -47,10 +60,23 @@
 void
 benchinit(B *b, char *name)
 {
+	Res r;
 	int i;
 
 	fmtinstall(L'σ', σfmt);
 	fmtinstall(L'τ', τfmt);
+
+	if(tstart == nil){
+		r = cpuid(0x80000001, 2);
+		if((r.dx & (1<<27)) != 0){
+			tstart = _tstartp;
+			tend = _tendp;
+		}else{
+			tstart = _tstart;
+			tend = _tend;
+		}
+	}
+
 	memset(b, 0, sizeof(*b));
 	b->name = name;
 	b->n = b->nc = Bstepmin;
--- a/b.h
+++ b/b.h
@@ -24,11 +24,11 @@
 
 /* private */
 void benchstep(B *b);
-void _tstart(uvlong *c);
-void _tend(uvlong *c);
+extern void (*tstart)(uvlong *c);
+extern void (*tend)(uvlong *c);
 
 /* public */
 void benchinit(B *b, char *name);
 void benchprint(B *b, int nb, int fd);
-#define benchin(b) do{ _tstart(&(b)->tin); }while(0)
-#define benchout(b) do{ _tend(&(b)->tout); benchstep(b); }while(0)
+#define benchin(b) do{ tstart(&(b)->tin); }while(0)
+#define benchout(b) do{ tend(&(b)->tout); benchstep(b); }while(0)
--- a/b_amd64.s
+++ b/b_amd64.s
@@ -1,5 +1,5 @@
 TEXT _tstart(SB), 1, $0
-	LFENCE
+	CPUID
 	RDTSC
 	MOVL AX, 0(RARG)
 	MOVL DX, 4(RARG)
@@ -6,8 +6,32 @@
 	RET
 
 TEXT _tend(SB), 1, $0
+	CPUID
 	RDTSC
 	MOVL AX, 0(RARG)
 	MOVL DX, 4(RARG)
+	RET
+
+TEXT _tstartp(SB), 1, $0
 	CPUID
+	WORD $0x010f; BYTE $0xf9 // RDTSCP
+	MOVL AX, 0(RARG)
+	MOVL DX, 4(RARG)
+	RET
+
+TEXT _tendp(SB), 1, $0
+	WORD $0x010f; BYTE $0xf9 // RDTSCP
+	MOVL AX, 0(RARG)
+	MOVL DX, 4(RARG)
+	CPUID
+	RET
+
+TEXT cpuid(SB), 1, $0
+	MOVL ax+8(FP), AX
+	MOVL cx+16(FP), CX
+	CPUID
+	MOVL AX, 0(BP)
+	MOVL BX, 4(BP)
+	MOVL CX, 8(BP)
+	MOVL DX, 12(BP)
 	RET