shithub: riscv

Download patch

ref: c8544c91d9bd6d8094d80881d318cbc6dac899f1
parent: 0fcab2a6e8896ba99c935a9feee798d06ed8fc3b
author: Jacob Moody <moody@posixcafe.org>
date: Wed May 8 13:20:22 EDT 2024

prof: increase precision of measurements

Currently we use millisecond ticks for time spent in each function.
This is not good enough for modern machines where fast functions could
be completed in a handful of nanoseconds.  Instead let us just use the
raw ticks and store the cyclecfreq in the output prof data.  This
requires that we enlargen the time section in the data to 8 bytes,
which broke the assumptions for struct allignment and required a
slight refactor of the code used to read it in prof(1).  Since this is
a breaking change, we've devised a small format to communicate the
version for future revision of this format and this patch includes a
modification to file(1) for recognizing this format.  Additionally
some minor improvements were made across the board.

--- a/sys/man/1/prof
+++ b/sys/man/1/prof
@@ -6,12 +6,8 @@
 [
 .B -dr
 ]
-[
 .I program
-]
-[
 .I profile
-]
 .PP
 .B tprof
 .I pid
@@ -26,16 +22,11 @@
 option of
 .IR 2l (1)
 or other loader.
-The symbol table in the
-named program file
-.RL ( 2.out
-etc., according to
-.BR $objtype ,
-by default)
-is read and correlated with the
-profile file
-.RL ( prof.out
-by default).
+The symbol table in the named
+.I program
+file is read and correlated with the
+.I profile
+file.
 For each symbol, the percentage
 of time (in seconds) spent executing between that symbol
 and the next
@@ -59,7 +50,7 @@
 .I symbol
 is the entry point of the call,
 .I time
-is in milliseconds,
+is in seconds,
 and
 .I ncall
 is the number of times that entry point was called at that
--- a/sys/src/cmd/file.c
+++ b/sys/src/cmd/file.c
@@ -918,6 +918,7 @@
 	"MThd",			"midi audio",		4,	"audio/midi",
 	"MUS\x1a",		"mus audio",		4,	"audio/mus",
 	"Creative Voice File\x1a",	"voc audio",	20,	"audio/x-voc",
+	"pr\x0f",		"Plan 9 profile data",	3,	OCTET,
 	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
 	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
 	"\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
--- a/sys/src/cmd/prof.c
+++ b/sys/src/cmd/prof.c
@@ -13,8 +13,9 @@
 	ushort	right;
 	ulong	pc;
 	ulong	count;
-	ulong	time;
+	uvlong	time;
 };
+enum { Datasz = 2+2+4+4+8 };
 
 struct Pc
 {
@@ -26,13 +27,13 @@
 {
 	char	*name;
 	ulong	pc;
-	ulong	ms;
-	ulong	calls;
+	uvlong	ticks;
+	uvlong	calls;
 };
 
 Data*	data;
 Acc*	acc;
-ulong	ms;
+uvlong	ticks;
 long	nsym;
 long	ndata;
 int	dflag;
@@ -40,6 +41,7 @@
 Biobuf	bout;
 int	tabstop = 4;
 int	verbose;
+uvlong	cyclefreq;
 
 void	syms(char*);
 void	datas(char*);
@@ -68,18 +70,15 @@
 		rflag = 1;
 		break;
 	default:
+	usage:
 		fprint(2, "usage: prof [-dr] [8.out] [prof.out]\n");
 		exits("usage");
 	}ARGEND
 	Binit(&bout, 1, OWRITE);
-	if(argc > 0)
-		syms(argv[0]);
-	else
-		syms(defaout());
-	if(argc > 1)
-		datas(argv[1]);
-	else
-		datas("prof.out");
+	if(argc < 2)
+		goto usage;
+	syms(argv[0]);
+	datas(argv[1]);
 	if(ndata){
 		if(dflag)
 			graph(0, data[0].down, 0);
@@ -89,26 +88,16 @@
 	exits(0);
 }
 
-void
-swapdata(Data *dp)
-{
-	dp->down = beswab(dp->down);
-	dp->right = beswab(dp->right);
-	dp->pc = beswal(dp->pc);
-	dp->count = beswal(dp->count);
-	dp->time = beswal(dp->time);
-}
-
 int
 acmp(void *va, void *vb)
 {
 	Acc *a, *b;
-	ulong ua, ub;
+	uvlong ua, ub;
 
 	a = va;
 	b = vb;
-	ua = a->ms;
-	ub = b->ms;
+	ua = a->ticks;
+	ub = b->ticks;
 
 	if(ua > ub)
 		return 1;
@@ -123,25 +112,22 @@
 	Fhdr f;
 	int fd;
 
-	if((fd = open(cout, 0)) < 0){
-		perror(cout);
-		exits("open");
-	}
-	if (!crackhdr(fd, &f)) {
-		fprint(2, "can't read text file header\n");
-		exits("read");
-	}
-	if (f.type == FNONE) {
-		fprint(2, "text file not an a.out\n");
-		exits("file type");
-	}
-	if (syminit(fd, &f) < 0) {
-		fprint(2, "syminit: %r\n");
-		exits("syms");
-	}
+	if((fd = open(cout, 0)) < 0)
+		sysfatal("%r");
+	if(!crackhdr(fd, &f))
+		sysfatal("can't read text file header: %r");
+	if(f.type == FNONE)
+		sysfatal("text file not an a.out");
+	if(syminit(fd, &f) < 0)
+		sysfatal("syminit: %r");
 	close(fd);
 }
 
+#define GET2(p) (u16int)(p)[1] | (u16int)(p)[0]<<8
+#define GET4(p) (u32int)(p)[3] | (u32int)(p)[2]<<8 | (u32int)(p)[1]<<16 | (u32int)(p)[0]<<24
+#define GET8(p) (u64int)(p)[7] | (u64int)(p)[6]<<8 | (u64int)(p)[5]<<16 | (u64int)(p)[4]<<24 | \
+		(u64int)(p)[3]<<32 | (u64int)(p)[2]<<40 | (u64int)(p)[1]<<48 | (u64int)(p)[0]<<56
+
 void
 datas(char *dout)
 {
@@ -148,6 +134,7 @@
 	int fd;
 	Dir *d;
 	int i;
+	uchar hdr[3+1+8], *buf, *p;
 
 	if((fd = open(dout, 0)) < 0){
 		perror(dout);
@@ -158,20 +145,29 @@
 		perror(dout);
 		exits("stat");
 	}
-	ndata = d->length/sizeof(data[0]);
+	d->length -= sizeof hdr;
+	ndata = d->length/Datasz;
 	data = malloc(ndata*sizeof(Data));
-	if(data == 0){
-		fprint(2, "prof: can't malloc data\n");
-		exits("data malloc");
+	buf = malloc(d->length);
+	if(buf == 0 || data == 0)
+		sysfatal("malloc");
+	if(read(fd, hdr, sizeof hdr) != sizeof hdr)
+		sysfatal("read data header: %r");
+	if(memcmp(hdr, "pr\x0f", 3) != 0)
+		sysfatal("bad magic");
+	cyclefreq = GET8(hdr+4);
+	if(readn(fd, buf, d->length) != d->length)
+		sysfatal("data file read: %r");
+	for(p = buf, i = 0; i < ndata; i++){
+		data[i].down = GET2(p); p += 2;
+		data[i].right = GET2(p); p += 2;
+		data[i].pc = GET4(p); p += 4;
+		data[i].count = GET4(p); p += 4;
+		data[i].time = GET8(p); p += 8;
 	}
-	if(read(fd, data, d->length) != d->length){
-		fprint(2, "prof: can't read data file\n");
-		exits("data read");
-	}
+	free(buf);
 	free(d);
 	close(fd);
-	for (i = 0; i < ndata; i++)
-		swapdata(data+i);
 }
 
 char*
@@ -189,7 +185,8 @@
 void
 graph(int ind, ulong i, Pc *pc)
 {
-	long time, count, prgm;
+	long count, prgm;
+	vlong time;
 	Pc lpc;
 
 	if(i >= ndata){
@@ -205,9 +202,9 @@
 		graph(ind, data[i].right, pc);
 	indent(ind);
 	if(count == 1)
-		Bprint(&bout, "%s:%lud\n", name(prgm), time);
+		Bprint(&bout, "%s:%.9f\n", name(prgm), (double)time/cyclefreq);
 	else
-		Bprint(&bout, "%s:%lud/%lud\n", name(prgm), time, count);
+		Bprint(&bout, "%s:%.9f/%lud\n", name(prgm), (double)time/cyclefreq, count);
 	if(data[i].down == 0xFFFF)
 		return;
 	lpc.next = pc;
@@ -246,10 +243,11 @@
 	return -1;
 }
 
-ulong
+double
 sum(ulong i)
 {
-	long j, dtime, time;
+	long j;
+	vlong dtime, time;
 	int k;
 	static indent;
 
@@ -264,7 +262,7 @@
 	if (verbose){
 		for(k = 0; k < indent; k++)
 			print("	");
-		print("%lud: %ld/%lud", i, data[i].time, data[i].count);
+		print("%lud: %llud/%.9f/%lud", i, data[i].time, (double)data[i].time/cyclefreq, data[i].count);
 		if (j >= 0)
 			print("	%s\n", acc[j].name);
 		else
@@ -278,8 +276,8 @@
 	}
 	j = symind(data[i].pc);
 	if (j >= 0) {
-		acc[j].ms += time - dtime;
-		ms += time - dtime;
+		acc[j].ticks += time - dtime;
+		ticks += time - dtime;
 		acc[j].calls += data[i].count;
 	}
 	if(data[i].right == 0xFFFF)
@@ -300,18 +298,18 @@
 		}
 		acc[nsym].name = s.name;
 		acc[nsym].pc = s.value;
-		acc[nsym].calls = acc[nsym].ms = 0;
+		acc[nsym].calls = acc[nsym].ticks = 0;
 	}
 	sum(data[0].down);
 	qsort(acc, nsym, sizeof(Acc), acmp);
-	Bprint(&bout, "  %%     Time     Calls  Name\n");
-	if(ms == 0)
-		ms = 1;
+	Bprint(&bout, "  %%       Time        Calls  Name\n");
+	if(ticks == 0)
+		ticks = 1;
 	while (--nsym >= 0) {
 		if(acc[nsym].calls)
-			Bprint(&bout, "%4.1f %8.3f %8lud\t%s\n",
-				(100.0*acc[nsym].ms)/ms,
-				acc[nsym].ms/1000.0,
+			Bprint(&bout, "%4.1f   %.9f %8llud\t%s\n",
+				(100.0*acc[nsym].ticks)/ticks,
+				(double)acc[nsym].ticks/cyclefreq,
 				acc[nsym].calls,
 				acc[nsym].name);
 	}
@@ -329,31 +327,4 @@
 	}
 	if(j)
 		Bwrite(&bout, ".                            ", j);
-}
-
-char*	trans[] =
-{
-	"386",		"8.out",
-	"68020",		"2.out",
-	"amd64",	"6.out",
-	"arm",		"5.out",
-	"mips",		"v.out",
-	"power",		"q.out",
-	"sparc",		"k.out",
-	"spim",		"0.out",
-	0,0
-};
-
-char*
-defaout(void)
-{
-	char *p;
-	int i;
-
-	p = getenv("objtype");
-	if(p)
-	for(i=0; trans[i]; i+=2)
-		if(strcmp(p, trans[i]) == 0)
-			return trans[i+1];
-	return trans[1];
 }
--- a/sys/src/libc/port/profile.c
+++ b/sys/src/libc/port/profile.c
@@ -6,9 +6,7 @@
 extern	uintptr	_saveret(void);
 extern	uintptr	_savearg(void);
 
-static	ulong	khz;
 static	ulong	perr;
-static	int	havecycles;
 
 typedef	struct	Plink	Plink;
 struct	Plink
@@ -19,7 +17,6 @@
 	long	pc;
 	long	count;
 	vlong	time;
-	uint	rec;
 };
 
 #pragma profile off
@@ -45,11 +42,6 @@
 	pp = _tos->prof.pp;
 	if(pp == 0 || (_tos->prof.pid && _tos->pid != _tos->prof.pid))
 		return _restore(arg, ret);
-	if(pc == pp->pc){
-		pp->rec++;
-		p = pp;
-		goto out;
-	}
 	for(p=pp->down; p; p=p->link)
 		if(p->pc == pc)
 			goto out;
@@ -119,10 +111,7 @@
 		p->time = p->time + _tos->clock;
 		break;
 	}
-	if(p->rec)
-		p->rec--;
-	else
-		_tos->prof.pp = p->old;
+	_tos->prof.pp = p->old;
 	return _restore(arg, ret);
 }
 
@@ -130,10 +119,11 @@
 _profdump(void)
 {
 	int f;
-	long n;
+	vlong n;
 	Plink *p;
 	char *vp;
 	char filename[64];
+	uchar hdr[3+1+8] = {'p', 'r', 0x0f, 0x2};
 
 	if (_tos->prof.what == 0)
 		return;	/* No profiling */
@@ -168,8 +158,17 @@
 		_tos->prof.first->time = _tos->clock;
 		break;
 	}
-	vp = (char*)_tos->prof.first;
+	hdr[4+0] = _tos->cyclefreq>>56;
+	hdr[4+1] = _tos->cyclefreq>>48;
+	hdr[4+2] = _tos->cyclefreq>>40;
+	hdr[4+3] = _tos->cyclefreq>>32;
+	hdr[4+4] = _tos->cyclefreq>>24;
+	hdr[4+5] = _tos->cyclefreq>>16;
+	hdr[4+6] = _tos->cyclefreq>>8;
+	hdr[4+7] = _tos->cyclefreq;
+	write(f, hdr, sizeof hdr);
 
+	vp = (char*)_tos->prof.first;
 	for(p = _tos->prof.first; p <= _tos->prof.next; p++) {
 
 		/*
@@ -214,16 +213,16 @@
 		/*
 		 * vlong time
 		 */
-		if (havecycles){
-			n = (vlong)(p->time / (vlong)khz);
-		}else
-			n = p->time;
-
-		vp[0] = n>>24;
-		vp[1] = n>>16;
-		vp[2] = n>>8;
-		vp[3] = n;
-		vp += 4;
+		n = p->time;
+		vp[0] = n>>56;
+		vp[1] = n>>48;
+		vp[2] = n>>40;
+		vp[3] = n>>32;
+		vp[4] = n>>24;
+		vp[5] = n>>16;
+		vp[6] = n>>8;
+		vp[7] = n;
+		vp += 8;
 	}
 	write(f, (char*)_tos->prof.first, vp - (char*)_tos->prof.first);
 	close(f);
@@ -250,10 +249,6 @@
 	int n, f;
 
 	n = 256*1024;
-	if (_tos->cyclefreq != 0LL){
-		khz = _tos->cyclefreq / 1000;	/* Report times in milliseconds */
-		havecycles = 1;
-	}
 	f = open("/env/profsize", OREAD|OCEXEC);
 	if(f >= 0) {
 		memset(ename, 0, sizeof(ename));