ref: 3c04d813ab19bd5b08cf7857b17976517de2ee71
dir: /bench.c/
#include <u.h>
#include <tos.h>
#include <libc.h>
#include <bench.h>
#define Nsec 1000000000ULL
#define BENCHTIME (Nsec) /* 1s in ns */
typedef struct Hist Hist;
struct Hist {
int min;
int max;
int bktmax;
int bkt[64];
};
int NPROC;
int showhist;
/*
* nsec() is wallclock and can be adjusted by timesync
* so need to use cycles() instead, but fall back to
* nsec() in case we can't
*/
uvlong
nanosec(void)
{
static uvlong fasthz, xstart;
uvlong x;
if(fasthz == ~0ULL)
return nsec() - xstart;
if(fasthz == 0){
if(_tos->cyclefreq){
fasthz = _tos->cyclefreq;
cycles(&xstart);
} else {
fasthz = ~0ULL;
xstart = nsec();
}
return 0;
}
cycles(&x);
x -= xstart;
uvlong q = x / fasthz;
uvlong r = x % fasthz;
return q*Nsec + r*Nsec/fasthz;
}
static int
min(int x, int y)
{
if(x > y) {
return y;
}
return x;
}
static int
max(int x, int y)
{
if(x < y) {
return y;
}
return x;
}
void
histrecord(Hist *h, B *b)
{
vlong ns;
int i;
if(h == nil)
return;
ns = b->ns;
for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++)
ns >>= 1;
h->bkt[i]++;
if(h->bkt[i] > h->bktmax)
h->bktmax = h->bkt[i];
if(i < h->min || h->min == -1)
h->min = i;
if(i >= h->max || h->max == -1)
h->max = i+1;
}
// run the benchmarking function once, looping n times
static void
benchrunn(B *b, int n, Hist *h)
{
// reset
b->ns = 0;
b->start = nanosec();
cycles(&b->scycles);
b->N = n;
b->item.fn(b);
// count
cycles(&b->ecycles);
b->ns += nanosec() - b->start - b->overheadns;
b->bcycles += b->ecycles - b->scycles - b->overheadcy;
histrecord(h, b);
}
static vlong
nsperop(B *b)
{
if(b->N <= 0)
return 0;
return b->ns / (vlong)b->N;
}
static uvlong
cyperop(B *b)
{
if(b->N <= 0)
return 0;
return b->bcycles / (uvlong)b->N;
}
static int
rounddown10(vlong n)
{
int tens, result, i;
tens = 0;
while(n >= 10) {
n = n / 10;
tens++;
}
result = 1;
for(i = 0; i < tens; i++) {
result *= 10;
}
return result;
}
static int
roundup(vlong ns, vlong div)
{
vlong n;
int base;
if(div == 0 || ns/div >= BENCHTIME)
return BENCHTIME;
n = ns / div;
base = rounddown10(n);
if(n <= 5)
return 5;
if(n <= base)
return base;
if(n <= 2*base)
return 2*base;
if(n <= 5*base)
return 5*base;
return 10*base;
}
// run the benchmark for one function
static void
benchrun(B *b, Hist *h)
{
int i, n;
b->overheadns = 0;
b->overheadcy = 0;
/* warm caches */
benchrunn(b, 0, nil);
/* measure overhead */
for(i = 0; i < 20; i++){
b->ns = 0;
b->bcycles = 0;
benchrunn(b, 0, nil);
if(i == 0 || b->ns < b->overheadns)
b->overheadns = b->ns;
if(i == 0 || b->bcycles < b->overheadcy)
b->overheadcy = b->bcycles;
}
/* do the run */
/* estimate */
benchrunn(b, 1, h);
n = roundup(BENCHTIME, b->ns);
print("%10d N ", n);
if(h != nil)
for(i = 0; i < n; i++)
benchrunn(b, 1, h);
else
benchrunn(b, n, nil);
}
double
scaletime(vlong ns, vlong n, char **unit)
{
static const struct {
char *name;
vlong div;
} units[] = {
{"ns", 1},
{"μs", 1000},
{"ms", 1000*1000},
{"s", 1000*1000*1000},
{"m", 60*1000*1000*1000},
{"h", 3600*1000*1000*1000},
};
int i;
for(i = 0; i < nelem(units)-1; i++)
if(ns / (n * units[i].div) < 1000)
break;
*unit = units[i].name;
return (double)ns / (double)(n*units[i].div);
}
static void
benchres(B *res, Hist *h)
{
char *unit;
char bar[64];
char tmop[64];
char cyop[32];
int i, j, lim;
double nsperop;
uvlong cyperop;
if(res->N <= 0) {
print("skipped\n");
return;
}
nsperop = scaletime(res->ns, (vlong)res->N, &unit);
snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit);
cyperop = res->bcycles / (uvlong)res->N;
if(cyperop < 10)
snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N);
else if(cyperop < 100)
snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->bcycles / (double)res->N);
else
snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop);
print("%s\t%s\n", tmop, cyop);
if(h != nil){
for(i = h->min; i < h->max; i++){
if(h->bktmax < 30)
lim = h->bkt[i];
else
lim = 30*h->bkt[i] / h->bktmax;
if(lim == 0 && h->bkt[i] > 0)
lim = 1;
for(j = 0; j < lim; j++)
bar[j] = '*';
bar[j] = 0;
print("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar);
}
}
}
static void
usage(void)
{
fprint(2, "usage: %s [-s]\n", argv0);
exits("usage");
}
void
benchinit(int argc, char **argv)
{
char *e;
if((e = getenv("NPROC")) == nil)
NPROC = 1;
else
NPROC = atoi(e);
free(e);
ARGBEGIN{
case 'h':
showhist++;
break;
default:
usage();
}ARGEND;
}
// bench a single function
void
bench(char *name, void (*fn)(B*))
{
Hist h;
B b;
memset(&b, 0, sizeof(B));
memset(&h, 0, sizeof(Hist));
b.item.name = name;
b.item.fn = fn;
if(strncmp(name, "bench", 5) == 0)
name += 5;
print("%24s\t", name);
h.min = -1;
h.max = -1;
benchrun(&b, showhist ? &h : nil);
benchres(&b, showhist ? &h : nil);
}
// bench an array of functions
void
benchitems(BItem items[], int len)
{
int i;
for(i = 0; i < len; i++) {
bench(items[i].name, items[i].fn);
}
}