ref: b154a740d6fab72c14c7fd2511e7071d31a9b6e7
dir: /bench.c/
#include <u.h>
#include <tos.h>
#include <libc.h>
#include <bench.h>
#define Nsec 1000000000ULL
#define BENCHTIME (Nsec) /* 1s in ns */
int NPROC;
/*
* nsec() is wallclock and can be adjusted by timesync
* so need to use cycles() instead, but fall back to
* nsec() in case we can't
*/
uvlong
nanosec(void)
{
static uvlong fasthz, xstart;
uvlong x;
if(fasthz == ~0ULL)
return nsec() - xstart;
if(fasthz == 0){
if(_tos->cyclefreq){
fasthz = _tos->cyclefreq;
cycles(&xstart);
} else {
fasthz = ~0ULL;
xstart = nsec();
}
return 0;
}
cycles(&x);
x -= xstart;
uvlong q = x / fasthz;
uvlong r = x % fasthz;
return q*Nsec + r*Nsec/fasthz;
}
static int
min(int x, int y)
{
if(x > y) {
return y;
}
return x;
}
static int
max(int x, int y)
{
if(x < y) {
return y;
}
return x;
}
// run the benchmarking function once, looping n times
static void
benchrunn(B *b, int n)
{
b->N = n;
// reset
b->start = nanosec();
b->ns = 0;
cycles(&b->scycles);
b->item.fn(b);
// stop
cycles(&b->ecycles);
b->ns += nanosec() - b->start;
if(b->overheadns != -1)
b->ns -= b->overheadns;
b->bcycles += b->ecycles - b->scycles;
if(b->overheadcy != -1)
b->bcycles -= b->overheadcy;
}
static vlong
nsperop(B *b)
{
if(b->N <= 0)
return 0;
return b->ns / (vlong)b->N;
}
static uvlong
cyperop(B *b)
{
if(b->N <= 0)
return 0;
return b->bcycles / (uvlong)b->N;
}
static int
rounddown10(int n)
{
int tens, result, i;
tens = 0;
while(n >= 10) {
n = n / 10;
tens++;
}
result = 1;
for(i = 0; i < tens; i++) {
result *= 10;
}
return result;
}
static int
roundup(int n)
{
int base;
base = rounddown10(n);
if(n <= base)
return base;
if(n <= 2*base)
return 2*base;
if(n <= 5*base)
return 5*base;
return 10*base;
}
// run the benchmark for one function
static BResult
benchrun(B *b)
{
int n, last;
vlong d;
BResult res;
b->overheadns = -1;
b->overheadcy = -1;
benchrunn(b, 0);
benchrunn(b, 0);
b->overheadns = b->ns;
b->overheadcy = b->bcycles;
n = 1;
benchrunn(b, n);
d = BENCHTIME;
while(b->ns < d && n < 1000000000) {
last = n;
if(nsperop(b) == 0) {
n = 1000000000;
} else {
n = (int) d/nsperop(b);
}
n = max(min(n+n/2, 100*last), last+1);
n = roundup(n);
benchrunn(b, n);
}
res.N = b->N;
res.ns = b->ns;
res.cycles = b->bcycles;
res.overhead = b->overheadns;
return res;
}
static void
benchres(BResult *res)
{
char nsop[32];
char cyop[32];
vlong nsperop;
uvlong cyperop;
if(res->N <= 0) {
nsperop = 0;
cyperop = 0;
} else {
nsperop = res->ns / (vlong)res->N;
cyperop = res->cycles / (uvlong)res->N;
}
snprint(nsop, sizeof(nsop), "%10lld ns/op", nsperop);
snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop);
if(res->N > 0 && nsperop < 100) {
if(nsperop < 10)
snprint(nsop, sizeof(nsop), "%13.2f ns/op", (double)res->ns / (double)res->N);
else
snprint(nsop, sizeof(nsop), "%12.1f ns/op", (double)res->ns / (double)res->N);
}
if(res->N > 0 && cyperop < 100) {
if(cyperop < 10)
snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N);
else
snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->cycles / (double)res->N);
}
print("%10d N %.16s\t%s (total %f s)\n", res->N, nsop, cyop, (double)res->ns / Nsec);
}
/*
* public api
*/
// setup. currently only calculates cycles() overhead.
// not strictly necessary, but will give better cycle counts.
void
benchinit(int, char **)
{
char *e;
if((e = getenv("NPROC")) == nil)
NPROC = 1;
else
NPROC = atoi(e);
free(e);
}
// bench a single function
void
bench(char *name, void (*fn)(B*))
{
B b;
BResult res;
memset(&b, 0, sizeof(B));
memset(&res, 0, sizeof(BResult));
b.item.name = name;
b.item.fn = fn;
if(strncmp(name, "bench", 5) == 0)
name += 5;
print("%16s\t", name);
res = benchrun(&b);
benchres(&res);
}
void
xbench(char *name, void (*fn)(B*), void (*init)(void))
{
init();
bench(name, fn);
}
// bench an array of functions
void
benchitems(BItem items[], int len)
{
int i;
for(i = 0; i < len; i++) {
bench(items[i].name, items[i].fn);
}
}