ref: c8544c91d9bd6d8094d80881d318cbc6dac899f1
parent: 0fcab2a6e8896ba99c935a9feee798d06ed8fc3b
author: Jacob Moody <moody@posixcafe.org>
date: Wed May 8 13:20:22 EDT 2024
prof: increase precision of measurements Currently we use millisecond ticks for time spent in each function. This is not good enough for modern machines where fast functions could be completed in a handful of nanoseconds. Instead let us just use the raw ticks and store the cyclecfreq in the output prof data. This requires that we enlargen the time section in the data to 8 bytes, which broke the assumptions for struct allignment and required a slight refactor of the code used to read it in prof(1). Since this is a breaking change, we've devised a small format to communicate the version for future revision of this format and this patch includes a modification to file(1) for recognizing this format. Additionally some minor improvements were made across the board.
--- a/sys/man/1/prof
+++ b/sys/man/1/prof
@@ -6,12 +6,8 @@
[
.B -dr
]
-[
.I program
-]
-[
.I profile
-]
.PP
.B tprof
.I pid
@@ -26,16 +22,11 @@
option of
.IR 2l (1)
or other loader.
-The symbol table in the
-named program file
-.RL ( 2.out
-etc., according to
-.BR $objtype ,
-by default)
-is read and correlated with the
-profile file
-.RL ( prof.out
-by default).
+The symbol table in the named
+.I program
+file is read and correlated with the
+.I profile
+file.
For each symbol, the percentage
of time (in seconds) spent executing between that symbol
and the next
@@ -59,7 +50,7 @@
.I symbol
is the entry point of the call,
.I time
-is in milliseconds,
+is in seconds,
and
.I ncall
is the number of times that entry point was called at that
--- a/sys/src/cmd/file.c
+++ b/sys/src/cmd/file.c
@@ -918,6 +918,7 @@
"MThd", "midi audio", 4, "audio/midi",
"MUS\x1a", "mus audio", 4, "audio/mus",
"Creative Voice File\x1a", "voc audio", 20, "audio/x-voc",
+ "pr\x0f", "Plan 9 profile data", 3, OCTET,
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
--- a/sys/src/cmd/prof.c
+++ b/sys/src/cmd/prof.c
@@ -13,8 +13,9 @@
ushort right;
ulong pc;
ulong count;
- ulong time;
+ uvlong time;
};
+enum { Datasz = 2+2+4+4+8 };
struct Pc
{
@@ -26,13 +27,13 @@
{
char *name;
ulong pc;
- ulong ms;
- ulong calls;
+ uvlong ticks;
+ uvlong calls;
};
Data* data;
Acc* acc;
-ulong ms;
+uvlong ticks;
long nsym;
long ndata;
int dflag;
@@ -40,6 +41,7 @@
Biobuf bout;
int tabstop = 4;
int verbose;
+uvlong cyclefreq;
void syms(char*);
void datas(char*);
@@ -68,18 +70,15 @@
rflag = 1;
break;
default:
+ usage:
fprint(2, "usage: prof [-dr] [8.out] [prof.out]\n");
exits("usage");
}ARGEND
Binit(&bout, 1, OWRITE);
- if(argc > 0)
- syms(argv[0]);
- else
- syms(defaout());
- if(argc > 1)
- datas(argv[1]);
- else
- datas("prof.out");
+ if(argc < 2)
+ goto usage;
+ syms(argv[0]);
+ datas(argv[1]);
if(ndata){
if(dflag)
graph(0, data[0].down, 0);
@@ -89,26 +88,16 @@
exits(0);
}
-void
-swapdata(Data *dp)
-{
- dp->down = beswab(dp->down);
- dp->right = beswab(dp->right);
- dp->pc = beswal(dp->pc);
- dp->count = beswal(dp->count);
- dp->time = beswal(dp->time);
-}
-
int
acmp(void *va, void *vb)
{
Acc *a, *b;
- ulong ua, ub;
+ uvlong ua, ub;
a = va;
b = vb;
- ua = a->ms;
- ub = b->ms;
+ ua = a->ticks;
+ ub = b->ticks;
if(ua > ub)
return 1;
@@ -123,25 +112,22 @@
Fhdr f;
int fd;
- if((fd = open(cout, 0)) < 0){
- perror(cout);
- exits("open");
- }
- if (!crackhdr(fd, &f)) {
- fprint(2, "can't read text file header\n");
- exits("read");
- }
- if (f.type == FNONE) {
- fprint(2, "text file not an a.out\n");
- exits("file type");
- }
- if (syminit(fd, &f) < 0) {
- fprint(2, "syminit: %r\n");
- exits("syms");
- }
+ if((fd = open(cout, 0)) < 0)
+ sysfatal("%r");
+ if(!crackhdr(fd, &f))
+ sysfatal("can't read text file header: %r");
+ if(f.type == FNONE)
+ sysfatal("text file not an a.out");
+ if(syminit(fd, &f) < 0)
+ sysfatal("syminit: %r");
close(fd);
}
+#define GET2(p) (u16int)(p)[1] | (u16int)(p)[0]<<8
+#define GET4(p) (u32int)(p)[3] | (u32int)(p)[2]<<8 | (u32int)(p)[1]<<16 | (u32int)(p)[0]<<24
+#define GET8(p) (u64int)(p)[7] | (u64int)(p)[6]<<8 | (u64int)(p)[5]<<16 | (u64int)(p)[4]<<24 | \
+ (u64int)(p)[3]<<32 | (u64int)(p)[2]<<40 | (u64int)(p)[1]<<48 | (u64int)(p)[0]<<56
+
void
datas(char *dout)
{
@@ -148,6 +134,7 @@
int fd;
Dir *d;
int i;
+ uchar hdr[3+1+8], *buf, *p;
if((fd = open(dout, 0)) < 0){
perror(dout);
@@ -158,20 +145,29 @@
perror(dout);
exits("stat");
}
- ndata = d->length/sizeof(data[0]);
+ d->length -= sizeof hdr;
+ ndata = d->length/Datasz;
data = malloc(ndata*sizeof(Data));
- if(data == 0){
- fprint(2, "prof: can't malloc data\n");
- exits("data malloc");
+ buf = malloc(d->length);
+ if(buf == 0 || data == 0)
+ sysfatal("malloc");
+ if(read(fd, hdr, sizeof hdr) != sizeof hdr)
+ sysfatal("read data header: %r");
+ if(memcmp(hdr, "pr\x0f", 3) != 0)
+ sysfatal("bad magic");
+ cyclefreq = GET8(hdr+4);
+ if(readn(fd, buf, d->length) != d->length)
+ sysfatal("data file read: %r");
+ for(p = buf, i = 0; i < ndata; i++){
+ data[i].down = GET2(p); p += 2;
+ data[i].right = GET2(p); p += 2;
+ data[i].pc = GET4(p); p += 4;
+ data[i].count = GET4(p); p += 4;
+ data[i].time = GET8(p); p += 8;
}
- if(read(fd, data, d->length) != d->length){
- fprint(2, "prof: can't read data file\n");
- exits("data read");
- }
+ free(buf);
free(d);
close(fd);
- for (i = 0; i < ndata; i++)
- swapdata(data+i);
}
char*
@@ -189,7 +185,8 @@
void
graph(int ind, ulong i, Pc *pc)
{
- long time, count, prgm;
+ long count, prgm;
+ vlong time;
Pc lpc;
if(i >= ndata){
@@ -205,9 +202,9 @@
graph(ind, data[i].right, pc);
indent(ind);
if(count == 1)
- Bprint(&bout, "%s:%lud\n", name(prgm), time);
+ Bprint(&bout, "%s:%.9f\n", name(prgm), (double)time/cyclefreq);
else
- Bprint(&bout, "%s:%lud/%lud\n", name(prgm), time, count);
+ Bprint(&bout, "%s:%.9f/%lud\n", name(prgm), (double)time/cyclefreq, count);
if(data[i].down == 0xFFFF)
return;
lpc.next = pc;
@@ -246,10 +243,11 @@
return -1;
}
-ulong
+double
sum(ulong i)
{
- long j, dtime, time;
+ long j;
+ vlong dtime, time;
int k;
static indent;
@@ -264,7 +262,7 @@
if (verbose){
for(k = 0; k < indent; k++)
print(" ");
- print("%lud: %ld/%lud", i, data[i].time, data[i].count);
+ print("%lud: %llud/%.9f/%lud", i, data[i].time, (double)data[i].time/cyclefreq, data[i].count);
if (j >= 0)
print(" %s\n", acc[j].name);
else
@@ -278,8 +276,8 @@
}
j = symind(data[i].pc);
if (j >= 0) {
- acc[j].ms += time - dtime;
- ms += time - dtime;
+ acc[j].ticks += time - dtime;
+ ticks += time - dtime;
acc[j].calls += data[i].count;
}
if(data[i].right == 0xFFFF)
@@ -300,18 +298,18 @@
}
acc[nsym].name = s.name;
acc[nsym].pc = s.value;
- acc[nsym].calls = acc[nsym].ms = 0;
+ acc[nsym].calls = acc[nsym].ticks = 0;
}
sum(data[0].down);
qsort(acc, nsym, sizeof(Acc), acmp);
- Bprint(&bout, " %% Time Calls Name\n");
- if(ms == 0)
- ms = 1;
+ Bprint(&bout, " %% Time Calls Name\n");
+ if(ticks == 0)
+ ticks = 1;
while (--nsym >= 0) {
if(acc[nsym].calls)
- Bprint(&bout, "%4.1f %8.3f %8lud\t%s\n",
- (100.0*acc[nsym].ms)/ms,
- acc[nsym].ms/1000.0,
+ Bprint(&bout, "%4.1f %.9f %8llud\t%s\n",
+ (100.0*acc[nsym].ticks)/ticks,
+ (double)acc[nsym].ticks/cyclefreq,
acc[nsym].calls,
acc[nsym].name);
}
@@ -329,31 +327,4 @@
}
if(j)
Bwrite(&bout, ". ", j);
-}
-
-char* trans[] =
-{
- "386", "8.out",
- "68020", "2.out",
- "amd64", "6.out",
- "arm", "5.out",
- "mips", "v.out",
- "power", "q.out",
- "sparc", "k.out",
- "spim", "0.out",
- 0,0
-};
-
-char*
-defaout(void)
-{
- char *p;
- int i;
-
- p = getenv("objtype");
- if(p)
- for(i=0; trans[i]; i+=2)
- if(strcmp(p, trans[i]) == 0)
- return trans[i+1];
- return trans[1];
}
--- a/sys/src/libc/port/profile.c
+++ b/sys/src/libc/port/profile.c
@@ -6,9 +6,7 @@
extern uintptr _saveret(void);
extern uintptr _savearg(void);
-static ulong khz;
static ulong perr;
-static int havecycles;
typedef struct Plink Plink;
struct Plink
@@ -19,7 +17,6 @@
long pc;
long count;
vlong time;
- uint rec;
};
#pragma profile off
@@ -45,11 +42,6 @@
pp = _tos->prof.pp;
if(pp == 0 || (_tos->prof.pid && _tos->pid != _tos->prof.pid))
return _restore(arg, ret);
- if(pc == pp->pc){
- pp->rec++;
- p = pp;
- goto out;
- }
for(p=pp->down; p; p=p->link)
if(p->pc == pc)
goto out;
@@ -119,10 +111,7 @@
p->time = p->time + _tos->clock;
break;
}
- if(p->rec)
- p->rec--;
- else
- _tos->prof.pp = p->old;
+ _tos->prof.pp = p->old;
return _restore(arg, ret);
}
@@ -130,10 +119,11 @@
_profdump(void)
{
int f;
- long n;
+ vlong n;
Plink *p;
char *vp;
char filename[64];
+ uchar hdr[3+1+8] = {'p', 'r', 0x0f, 0x2};
if (_tos->prof.what == 0)
return; /* No profiling */
@@ -168,8 +158,17 @@
_tos->prof.first->time = _tos->clock;
break;
}
- vp = (char*)_tos->prof.first;
+ hdr[4+0] = _tos->cyclefreq>>56;
+ hdr[4+1] = _tos->cyclefreq>>48;
+ hdr[4+2] = _tos->cyclefreq>>40;
+ hdr[4+3] = _tos->cyclefreq>>32;
+ hdr[4+4] = _tos->cyclefreq>>24;
+ hdr[4+5] = _tos->cyclefreq>>16;
+ hdr[4+6] = _tos->cyclefreq>>8;
+ hdr[4+7] = _tos->cyclefreq;
+ write(f, hdr, sizeof hdr);
+ vp = (char*)_tos->prof.first;
for(p = _tos->prof.first; p <= _tos->prof.next; p++) {
/*
@@ -214,16 +213,16 @@
/*
* vlong time
*/
- if (havecycles){
- n = (vlong)(p->time / (vlong)khz);
- }else
- n = p->time;
-
- vp[0] = n>>24;
- vp[1] = n>>16;
- vp[2] = n>>8;
- vp[3] = n;
- vp += 4;
+ n = p->time;
+ vp[0] = n>>56;
+ vp[1] = n>>48;
+ vp[2] = n>>40;
+ vp[3] = n>>32;
+ vp[4] = n>>24;
+ vp[5] = n>>16;
+ vp[6] = n>>8;
+ vp[7] = n;
+ vp += 8;
}
write(f, (char*)_tos->prof.first, vp - (char*)_tos->prof.first);
close(f);
@@ -250,10 +249,6 @@
int n, f;
n = 256*1024;
- if (_tos->cyclefreq != 0LL){
- khz = _tos->cyclefreq / 1000; /* Report times in milliseconds */
- havecycles = 1;
- }
f = open("/env/profsize", OREAD|OCEXEC);
if(f >= 0) {
memset(ename, 0, sizeof(ename));