ref: fb2c8083f028676d0c46e0d9a89de78b3f129552
parent: d36e762e12d108fec8cd19071b22909b4466812a
author: rodri <rgl@antares-labs.eu>
date: Sat Sep 7 14:39:36 EDT 2024
remove unnecessary copying. profile individual stage procs.
--- a/camera.c
+++ b/camera.c
@@ -87,17 +87,6 @@
}
static void
-updatetimes(Camera *c, Renderjob *j)
-{
- c->times.R[c->times.cur] = j->times.R;
- c->times.E[c->times.cur] = j->times.E;
- c->times.Tn[c->times.cur] = j->times.Tn;
- c->times.Rn[c->times.cur] = j->times.Rn;
- c->times.last = c->times.cur;
- c->times.cur = ++c->times.cur % nelem(c->times.R);
-}
-
-static void
verifycfg(Camera *c)
{
assert(c->view != nil);
@@ -208,6 +197,26 @@
c->by = crossvec3(c->bz, c->bx);
}
+static void
+printtimings(Renderjob *job)
+{
+ int i;
+
+ if(!job->rctl->doprof)
+ return;
+
+ fprint(2, "R %llud %llud\nE %llud %llud\n",
+ job->times.R.t0, job->times.R.t1,
+ job->times.E.t0, job->times.E.t1);
+ for(i = 0; i < job->rctl->nprocs/2; i++)
+ fprint(2, "T%d %llud %llud\n", i,
+ job->times.Tn[i].t0, job->times.Tn[i].t1);
+ for(i = 0; i < job->rctl->nprocs/2; i++)
+ fprint(2, "r%d %llud %llud\n", i,
+ job->times.Rn[i].t0, job->times.Rn[i].t1);
+ fprint(2, "\n");
+}
+
void
shootcamera(Camera *c, Shadertab *s)
{
@@ -224,6 +233,7 @@
job = emalloc(sizeof *job);
memset(job, 0, sizeof *job);
+ job->rctl = c->rctl;
job->fb = fbctl->getbb(fbctl);
job->camera = emalloc(sizeof *c);
*job->camera = *c;
@@ -233,7 +243,7 @@
fbctl->reset(fbctl, c->clearcolor);
t0 = nanosec();
- sendp(c->rctl->c, job);
+ sendp(c->rctl->jobq, job);
recvp(job->donec);
delscene(job->scene); /* destroy the snapshot */
/*
@@ -251,7 +261,7 @@
reloadcamera(job->camera);
job->scene = dupscene(skyboxscene);
job->shaders = &skyboxshader;
- sendp(c->rctl->c, job);
+ sendp(c->rctl->jobq, job);
recvp(job->donec);
delscene(job->scene);
}
@@ -259,7 +269,9 @@
fbctl->swap(fbctl);
updatestats(c, t1-t0);
- updatetimes(c, job);
+ printtimings(job);
+// free(job->times.Tn);
+// free(job->times.Rn);
chanfree(job->donec);
free(job->camera);
--- a/clip.c
+++ b/clip.c
@@ -108,6 +108,7 @@
d1 = (j&1) == 0? sd1[j]: -sd1[j];
perc = d0/(d0 - d1);
+ memset(&v, 0, sizeof v);
lerpvertex(&v, v0, v1, perc);
addvert(Vout, v);
@@ -191,6 +192,8 @@
Point3 dp;
Point Δp;
double len, perc;
+
+ memset(v, 0, sizeof v);
dp = subpt3(v1->p, v0->p);
len = hypot(dp.x, dp.y);
--- a/graphics.h
+++ b/graphics.h
@@ -56,8 +56,8 @@
typedef struct Shaderparams Shaderparams;
typedef struct SUparams SUparams;
typedef struct Shadertab Shadertab;
-typedef struct Renderer Renderer;
typedef struct Rendertime Rendertime;
+typedef struct Renderer Renderer;
typedef struct Renderjob Renderjob;
typedef struct Fragment Fragment;
typedef struct Astk Astk;
@@ -231,14 +231,16 @@
Color (*fshader)(Shaderparams*); /* fragment shader */
};
-struct Renderer
+struct Rendertime
{
- Channel *c;
+ uvlong t0, t1;
};
-struct Rendertime
+struct Renderer
{
- uvlong t0, t1;
+ Channel *jobq;
+ ulong nprocs;
+ int doprof; /* enable profiling */
};
struct Renderjob
@@ -245,6 +247,7 @@
{
Ref;
uvlong id;
+ Renderer *rctl;
Framebuf *fb;
Camera *camera;
Scene *scene;
@@ -252,7 +255,8 @@
Channel *donec;
struct {
- Rendertime R, E, Tn, Rn; /* renderer, entityproc, tilers, rasterizers */
+ /* renderer, entityproc, tilers, rasterizers */
+ Rendertime R, E, Tn[20], Rn[20];
} times;
Renderjob *next;
@@ -356,10 +360,6 @@
uvlong min, avg, max, acc, n, v;
uvlong nframes;
} stats;
- struct {
- Rendertime R[10], E[10], Tn[10], Rn[10];
- int last, cur;
- } times;
};
/* camera */
--- a/internal.h
+++ b/internal.h
@@ -1,4 +1,5 @@
typedef struct Polygon Polygon;
+typedef struct Entityparam Entityparam;
typedef struct Tilerparam Tilerparam;
typedef struct Rasterparam Rasterparam;
typedef struct Rastertask Rastertask;
@@ -8,6 +9,12 @@
Vertex *v;
ulong n;
ulong cap;
+};
+
+struct Entityparam
+{
+ Renderer *rctl;
+ Channel *paramsc;
};
struct Tilerparam
--- a/render.c
+++ b/render.c
@@ -191,7 +191,7 @@
{
SUparams *params;
Raster *cr, *zr;
- Primitive prim;
+ Primitive *prim;
Vertex v;
Shaderparams fsp;
Triangle2 t;
@@ -204,8 +204,9 @@
int steep = 0, Δe, e, Δy;
params = task->params;
- prim = task->p;
+ prim = &task->p;
memset(&fsp, 0, sizeof fsp);
+ memset(&v, 0, sizeof v);
fsp.su = params;
fsp.v = &v;
fsp.getuniform = sparams_getuniform;
@@ -216,11 +217,11 @@
cr = params->fb->rasters;
zr = cr->next;
- switch(prim.type){
+ switch(prim->type){
case PPoint:
- p = Pt(prim.v[0].p.x, prim.v[0].p.y);
+ p = Pt(prim->v[0].p.x, prim->v[0].p.y);
- z = fclamp(prim.v[0].p.z, 0, 1);
+ z = fclamp(prim->v[0].p.z, 0, 1);
if(params->camera->enabledepth){
if(z <= getdepth(zr, p))
break;
@@ -227,7 +228,7 @@
putdepth(zr, p, z);
}
- *fsp.v = dupvertex(&prim.v[0]);
+ fsp.v = &prim->v[0];
fsp.p = p;
c = params->fshader(&fsp);
if(params->camera->enableAbuff)
@@ -237,10 +238,10 @@
delvattrs(fsp.v);
break;
case PLine:
- p0 = Pt(prim.v[0].p.x, prim.v[0].p.y);
- p1 = Pt(prim.v[1].p.x, prim.v[1].p.y);
+ p0 = Pt(prim->v[0].p.x, prim->v[0].p.y);
+ p1 = Pt(prim->v[1].p.x, prim->v[1].p.y);
/* clip it against our wr */
- if(rectclipline(task->wr, &p0, &p1, &prim.v[0], &prim.v[1]) < 0)
+ if(rectclipline(task->wr, &p0, &p1, &prim->v[0], &prim->v[1]) < 0)
break;
/* transpose the points */
@@ -253,7 +254,7 @@
/* make them left-to-right */
if(p0.x > p1.x){
SWAP(Point, &p0, &p1);
- SWAP(Vertex, &prim.v[0], &prim.v[1]);
+ SWAP(Vertex, &prim->v[0], &prim->v[1]);
}
dp = subpt(p1, p0);
@@ -268,7 +269,7 @@
if(steep) SWAP(int, &p.x, &p.y);
- z = flerp(prim.v[0].p.z, prim.v[1].p.z, perc);
+ z = flerp(prim->v[0].p.z, prim->v[1].p.z, perc);
/* TODO get rid of the bounds check and make sure the clipping doesn't overflow */
if(params->camera->enabledepth){
if(!ptinrect(p, params->fb->r) || z <= getdepth(zr, p))
@@ -277,12 +278,12 @@
}
/* interpolate z⁻¹ and get actual z */
- pcz = flerp(prim.v[0].p.w, prim.v[1].p.w, perc);
+ pcz = flerp(prim->v[0].p.w, prim->v[1].p.w, perc);
pcz = 1.0/(pcz < 1e-5? 1e-5: pcz);
/* perspective-correct attribute interpolation */
- perc *= prim.v[0].p.w * pcz;
- lerpvertex(fsp.v, &prim.v[0], &prim.v[1], perc);
+ perc *= prim->v[0].p.w * pcz;
+ lerpvertex(fsp.v, &prim->v[0], &prim->v[1], perc);
fsp.p = p;
c = params->fshader(&fsp);
@@ -290,7 +291,6 @@
pushtoAbuf(params->fb, p, c, z);
else
pixel(cr, p, c, params->camera->enableblend);
- delvattrs(fsp.v);
discard:
if(steep) SWAP(int, &p.x, &p.y);
@@ -300,20 +300,18 @@
e -= 2*dp.x;
}
}
+ delvattrs(fsp.v);
break;
case PTriangle:
- t.p0 = Pt2(prim.v[0].p.x, prim.v[0].p.y, 1);
- t.p1 = Pt2(prim.v[1].p.x, prim.v[1].p.y, 1);
- t.p2 = Pt2(prim.v[2].p.x, prim.v[2].p.y, 1);
+ t.p0 = Pt2(prim->v[0].p.x, prim->v[0].p.y, 1);
+ t.p1 = Pt2(prim->v[1].p.x, prim->v[1].p.y, 1);
+ t.p2 = Pt2(prim->v[2].p.x, prim->v[2].p.y, 1);
/* find the triangle's bbox and clip it against our wr */
bbox.min.x = min(min(t.p0.x, t.p1.x), t.p2.x);
bbox.min.y = min(min(t.p0.y, t.p1.y), t.p2.y);
bbox.max.x = max(max(t.p0.x, t.p1.x), t.p2.x)+1;
bbox.max.y = max(max(t.p0.y, t.p1.y), t.p2.y)+1;
- bbox.min.x = max(bbox.min.x, task->wr.min.x);
- bbox.min.y = max(bbox.min.y, task->wr.min.y);
- bbox.max.x = min(bbox.max.x, task->wr.max.x);
- bbox.max.y = min(bbox.max.y, task->wr.max.y);
+ rectclip(&bbox, task->wr);
for(p.y = bbox.min.y; p.y < bbox.max.y; p.y++)
for(p.x = bbox.min.x; p.x < bbox.max.x; p.x++){
@@ -321,7 +319,7 @@
if(bc.x < 0 || bc.y < 0 || bc.z < 0)
continue;
- z = fberp(prim.v[0].p.z, prim.v[1].p.z, prim.v[2].p.z, bc);
+ z = fberp(prim->v[0].p.z, prim->v[1].p.z, prim->v[2].p.z, bc);
if(params->camera->enabledepth){
if(z <= getdepth(zr, p))
continue;
@@ -329,14 +327,14 @@
}
/* interpolate z⁻¹ and get actual z */
- pcz = fberp(prim.v[0].p.w, prim.v[1].p.w, prim.v[2].p.w, bc);
+ pcz = fberp(prim->v[0].p.w, prim->v[1].p.w, prim->v[2].p.w, bc);
pcz = 1.0/(pcz < 1e-5? 1e-5: pcz);
/* perspective-correct attribute interpolation */
- bc = modulapt3(bc, Vec3(prim.v[0].p.w*pcz,
- prim.v[1].p.w*pcz,
- prim.v[2].p.w*pcz));
- berpvertex(fsp.v, &prim.v[0], &prim.v[1], &prim.v[2], bc);
+ bc = modulapt3(bc, Vec3(prim->v[0].p.w*pcz,
+ prim->v[1].p.w*pcz,
+ prim->v[2].p.w*pcz));
+ berpvertex(fsp.v, &prim->v[0], &prim->v[1], &prim->v[2], bc);
fsp.p = p;
c = params->fshader(&fsp);
@@ -344,8 +342,8 @@
pushtoAbuf(params->fb, p, c, z);
else
pixel(cr, p, c, params->camera->enableblend);
- delvattrs(fsp.v);
}
+ delvattrs(fsp.v);
break;
default: sysfatal("alien primitive detected");
}
@@ -357,6 +355,7 @@
Rasterparam *rp;
Rastertask *task;
SUparams *params;
+ Renderjob *job;
uvlong t0;
int i;
@@ -368,22 +367,23 @@
t0 = nanosec();
params = task->params;
+ job = params->job;
+ if(job->times.Rn[rp->id].t0 == 0)
+ job->times.Rn[rp->id].t0 = t0;
+
/* end of job */
if(params->entity == nil){
- if(decref(params->job) < 1){
- if(params->job->camera->enableAbuff)
- squashAbuf(params->job->fb, params->job->camera->enableblend);
- params->job->times.Rn.t1 = nanosec();
- nbsend(params->job->donec, nil);
+ if(decref(job) < 1){
+ if(job->camera->enableAbuff)
+ squashAbuf(job->fb, job->camera->enableblend);
+ nbsend(job->donec, nil);
free(params);
}
+ job->times.Rn[rp->id].t1 = nanosec();
free(task);
continue;
}
- if(params->job->times.Rn.t0 == 0)
- params->job->times.Rn.t0 = t0;
-
rasterize(task);
for(i = 0; i < task->p.type+1; i++)
@@ -423,11 +423,12 @@
while((params = recvp(tp->paramsc)) != nil){
t0 = nanosec();
- if(params->job->times.Tn.t0 == 0)
- params->job->times.Tn.t0 = t0;
+ if(params->job->times.Tn[tp->id].t0 == 0)
+ params->job->times.Tn[tp->id].t0 = t0;
/* end of job */
if(params->entity == nil){
+ params->job->times.Tn[tp->id].t1 = nanosec();
if(decref(params->job) < 1){
params->job->ref = nproc;
for(i = 0; i < nproc; i++){
@@ -436,7 +437,6 @@
task->params = params;
sendp(taskchans[i], task);
}
- params->job->times.Tn.t1 = nanosec();
}
continue;
}
@@ -476,7 +476,7 @@
bbox.max.y = p->v[0].p.y+1;
for(i = 0; i < nproc; i++)
- if(rectXrect(bbox,wr[i])){
+ if(rectXrect(bbox, wr[i])){
newparams = emalloc(sizeof *newparams);
*newparams = *params;
task = emalloc(sizeof *task);
@@ -485,6 +485,7 @@
task->p = *p;
task->p.v[0] = dupvertex(&p->v[0]);
sendp(taskchans[i], task);
+ break;
}
delvattrs(&p->v[0]);
break;
@@ -518,7 +519,7 @@
bbox.max.y = max(p->v[0].p.y, p->v[1].p.y)+1;
for(i = 0; i < nproc; i++)
- if(rectXrect(bbox,wr[i])){
+ if(rectXrect(bbox, wr[i])){
newparams = emalloc(sizeof *newparams);
*newparams = *params;
task = emalloc(sizeof *task);
@@ -570,7 +571,7 @@
bbox.max.y = max(max(p->v[0].p.y, p->v[1].p.y), p->v[2].p.y)+1;
for(i = 0; i < nproc; i++)
- if(rectXrect(bbox,wr[i])){
+ if(rectXrect(bbox, wr[i])){
newparams = emalloc(sizeof *newparams);
*newparams = *params;
task = emalloc(sizeof *task);
@@ -598,12 +599,12 @@
static void
entityproc(void *arg)
{
+ Entityparam *ep;
Channel *paramsin, **paramsout, **taskchans;
Tilerparam *tp;
Rasterparam *rp;
SUparams *params, *newparams;
Primitive *eb, *ee;
- char *nprocs;
ulong stride, nprims, nproc, nworkers;
int i;
uvlong t0;
@@ -610,13 +611,12 @@
threadsetname("entityproc");
- paramsin = arg;
- nprocs = getenv("NPROC");
- if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
- nproc = 1;
- else
+ ep = arg;
+ paramsin = ep->paramsc;
+
+ nproc = ep->rctl->nprocs;
+ if(nproc > 2)
nproc /= 2;
- free(nprocs);
paramsout = emalloc(nproc*sizeof(*paramsout));
taskchans = emalloc(nproc*sizeof(*taskchans));
@@ -641,6 +641,14 @@
if(params->job->times.E.t0 == 0)
params->job->times.E.t0 = t0;
+ /* prof: initialize timing slots for the next stages */
+// if(params->job->times.Tn == nil){
+// params->job->times.Tn = emalloc(nproc*sizeof(Rendertime));
+// params->job->times.Rn = emalloc(nproc*sizeof(Rendertime));
+// memset(params->job->times.Tn, 0, nproc*sizeof(Rendertime));
+// memset(params->job->times.Rn, 0, nproc*sizeof(Rendertime));
+// }
+
/* end of job */
if(params->entity == nil){
params->job->ref = nproc;
@@ -676,23 +684,25 @@
static void
renderer(void *arg)
{
- Channel *jobc;
+ Renderer *rctl;
Renderjob *job;
Scene *sc;
Entity *ent;
SUparams *params;
- Channel *paramsc;
+ Entityparam *ep;
uvlong time, lastid;
threadsetname("renderer");
- jobc = arg;
+ rctl = arg;
lastid = 0;
- paramsc = chancreate(sizeof(SUparams*), 8);
- proccreate(entityproc, paramsc, mainstacksize);
+ ep = emalloc(sizeof *ep);
+ ep->rctl = rctl;
+ ep->paramsc = chancreate(sizeof(SUparams*), 8);
+ proccreate(entityproc, ep, mainstacksize);
- while((job = recvp(jobc)) != nil){
+ while((job = recvp(rctl->jobq)) != nil){
time = nanosec();
job->times.R.t0 = time;
job->id = lastid++;
@@ -718,13 +728,13 @@
params->uni_time = time;
params->vshader = job->shaders->vshader;
params->fshader = job->shaders->fshader;
- sendp(paramsc, params);
+ sendp(ep->paramsc, params);
}
/* mark end of job */
params = emalloc(sizeof *params);
memset(params, 0, sizeof *params);
params->job = job;
- sendp(paramsc, params);
+ sendp(ep->paramsc, params);
job->times.R.t1 = nanosec();
}
@@ -734,9 +744,17 @@
initgraphics(void)
{
Renderer *r;
+ char *nprocs;
+ ulong nproc;
+ nprocs = getenv("NPROC");
+ if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
+ nproc = 1;
+ free(nprocs);
+
r = emalloc(sizeof *r);
- r->c = chancreate(sizeof(Renderjob*), 8);
- proccreate(renderer, r->c, mainstacksize);
+ r->jobq = chancreate(sizeof(Renderjob*), 8);
+ r->nprocs = nproc;
+ proccreate(renderer, r, mainstacksize);
return r;
}
--- a/vertex.c
+++ b/vertex.c
@@ -59,8 +59,6 @@
v->c = lerp3(v0->c, v1->c, t);
v->uv = lerp2(v0->uv, v1->uv, t);
v->mtl = v0->mtl != nil? v0->mtl: v1->mtl;
- v->attrs = nil;
- v->nattrs = 0;
for(i = 0; i < v0->nattrs; i++){
va.id = v0->attrs[i].id;
va.type = v0->attrs[i].type;
@@ -86,8 +84,6 @@
v->c = berp3(v0->c, v1->c, v2->c, bc);
v->uv = berp2(v0->uv, v1->uv, v2->uv, bc);
v->mtl = v0->mtl != nil? v0->mtl: v1->mtl != nil? v1->mtl: v2->mtl;
- v->attrs = nil;
- v->nattrs = 0;
for(i = 0; i < v0->nattrs; i++){
va.id = v0->attrs[i].id;
va.type = v0->attrs[i].type;