ref: 16a2cc57f9cd49f84db08e9c46478d8d0acfdf45
dir: /bench.c/
#include <u.h>
#include <tos.h>
#include <libc.h>
#include <bench.h>
#define Nsec 1000000000ULL
#define BENCHTIME (Nsec) /* 1s in ns */
int NPROC;
/*
* nsec() is wallclock and can be adjusted by timesync
* so need to use cycles() instead, but fall back to
* nsec() in case we can't
*/
uvlong
nanosec(void)
{
static uvlong fasthz, xstart;
uvlong x;
if(fasthz == ~0ULL)
return nsec() - xstart;
if(fasthz == 0){
if(_tos->cyclefreq){
fasthz = _tos->cyclefreq;
cycles(&xstart);
} else {
fasthz = ~0ULL;
xstart = nsec();
}
return 0;
}
cycles(&x);
x -= xstart;
uvlong q = x / fasthz;
uvlong r = x % fasthz;
return q*Nsec + r*Nsec/fasthz;
}
static int
min(int x, int y)
{
if(x > y) {
return y;
}
return x;
}
static int
max(int x, int y)
{
if(x < y) {
return y;
}
return x;
}
// run the benchmarking function once, looping n times
static void
benchrunn(B *b, int n)
{
b->N = n;
// reset
b->start = nanosec();
b->ns = 0;
cycles(&b->scycles);
b->item.fn(b);
// stop
cycles(&b->ecycles);
b->ns += nanosec() - b->start;
if(b->overheadns != -1)
b->ns -= b->overheadns;
b->bcycles += b->ecycles - b->scycles;
if(b->overheadcy != -1)
b->bcycles -= b->overheadcy;
}
static vlong
nsperop(B *b)
{
if(b->N <= 0)
return 0;
return b->ns / (vlong)b->N;
}
static uvlong
cyperop(B *b)
{
if(b->N <= 0)
return 0;
return b->bcycles / (uvlong)b->N;
}
static int
rounddown10(int n)
{
int tens, result, i;
tens = 0;
while(n >= 10) {
n = n / 10;
tens++;
}
result = 1;
for(i = 0; i < tens; i++) {
result *= 10;
}
return result;
}
static int
roundup(int n)
{
int base;
base = rounddown10(n);
if(n <= base)
return base;
if(n <= 2*base)
return 2*base;
if(n <= 5*base)
return 5*base;
return 10*base;
}
// run the benchmark for one function
static BResult
benchrun(B *b)
{
int n, last;
vlong d;
BResult res;
b->overheadns = -1;
b->overheadcy = -1;
benchrunn(b, 0);
benchrunn(b, 0);
b->overheadns = b->ns;
b->overheadcy = b->bcycles;
n = 1;
benchrunn(b, n);
d = BENCHTIME;
while(b->ns < d && n < 1000000000) {
last = n;
if(nsperop(b) == 0) {
n = 1000000000;
} else {
n = (int) d/nsperop(b);
}
n = max(min(n+n/2, 100*last), last+1);
n = roundup(n);
benchrunn(b, n);
}
res.N = b->N;
res.ns = b->ns;
res.cycles = b->bcycles;
res.overhead = b->overheadns;
return res;
}
double
scaletime(vlong ns, vlong n, char **unit)
{
static const struct {
char *name;
vlong div;
} units[] = {
{"ns", 1},
{"μs", 1000},
{"ms", 1000*1000},
{"s", 1000*1000*1000},
{"m", 60*1000*1000*1000},
{"h", 3600*1000*1000*1000},
};
int i;
for(i = 0; i < nelem(units)-1; i++)
if(ns / (n * units[i].div) < 1000)
break;
*unit = units[i].name;
return (double)ns / (double)(n*units[i].div);
}
static void
benchres(BResult *res)
{
char *unit;
char tmop[64];
char cyop[32];
double nsperop;
uvlong cyperop;
if(res->N <= 0) {
print("skipped\n");
return;
}
nsperop = scaletime(res->ns, (vlong)res->N, &unit);
snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit);
cyperop = res->cycles / (uvlong)res->N;
if(cyperop < 10)
snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N);
else if(cyperop < 100)
snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->cycles / (double)res->N);
else
snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop);
print("%10d N %s\t%s\n", res->N, tmop, cyop);
}
/*
* public api
*/
// setup. currently only calculates cycles() overhead.
// not strictly necessary, but will give better cycle counts.
void
benchinit(int, char **)
{
char *e;
if((e = getenv("NPROC")) == nil)
NPROC = 1;
else
NPROC = atoi(e);
free(e);
}
// bench a single function
void
bench(char *name, void (*fn)(B*))
{
B b;
BResult res;
memset(&b, 0, sizeof(B));
memset(&res, 0, sizeof(BResult));
b.item.name = name;
b.item.fn = fn;
if(strncmp(name, "bench", 5) == 0)
name += 5;
print("%24s\t", name);
res = benchrun(&b);
benchres(&res);
}
// bench an array of functions
void
benchitems(BItem items[], int len)
{
int i;
for(i = 0; i < len; i++) {
bench(items[i].name, items[i].fn);
}
}