shithub: libgraphics

Download patch

ref: fb2c8083f028676d0c46e0d9a89de78b3f129552
parent: d36e762e12d108fec8cd19071b22909b4466812a
author: rodri <rgl@antares-labs.eu>
date: Sat Sep 7 14:39:36 EDT 2024

remove unnecessary copying. profile individual stage procs.

--- a/camera.c
+++ b/camera.c
@@ -87,17 +87,6 @@
 }
 
 static void
-updatetimes(Camera *c, Renderjob *j)
-{
-	c->times.R[c->times.cur] = j->times.R;
-	c->times.E[c->times.cur] = j->times.E;
-	c->times.Tn[c->times.cur] = j->times.Tn;
-	c->times.Rn[c->times.cur] = j->times.Rn;
-	c->times.last = c->times.cur;
-	c->times.cur = ++c->times.cur % nelem(c->times.R);
-}
-
-static void
 verifycfg(Camera *c)
 {
 	assert(c->view != nil);
@@ -208,6 +197,26 @@
 	c->by = crossvec3(c->bz, c->bx);
 }
 
+static void
+printtimings(Renderjob *job)
+{
+	int i;
+
+	if(!job->rctl->doprof)
+		return;
+
+	fprint(2, "R %llud %llud\nE %llud %llud\n",
+		job->times.R.t0, job->times.R.t1,
+		job->times.E.t0, job->times.E.t1);
+	for(i = 0; i < job->rctl->nprocs/2; i++)
+		fprint(2, "T%d %llud %llud\n", i,
+			job->times.Tn[i].t0, job->times.Tn[i].t1);
+	for(i = 0; i < job->rctl->nprocs/2; i++)
+		fprint(2, "r%d %llud %llud\n", i,
+			job->times.Rn[i].t0, job->times.Rn[i].t1);
+	fprint(2, "\n");
+}
+
 void
 shootcamera(Camera *c, Shadertab *s)
 {
@@ -224,6 +233,7 @@
 
 	job = emalloc(sizeof *job);
 	memset(job, 0, sizeof *job);
+	job->rctl = c->rctl;
 	job->fb = fbctl->getbb(fbctl);
 	job->camera = emalloc(sizeof *c);
 	*job->camera = *c;
@@ -233,7 +243,7 @@
 
 	fbctl->reset(fbctl, c->clearcolor);
 	t0 = nanosec();
-	sendp(c->rctl->c, job);
+	sendp(c->rctl->jobq, job);
 	recvp(job->donec);
 	delscene(job->scene);			/* destroy the snapshot */
 	/*
@@ -251,7 +261,7 @@
 		reloadcamera(job->camera);
 		job->scene = dupscene(skyboxscene);
 		job->shaders = &skyboxshader;
-		sendp(c->rctl->c, job);
+		sendp(c->rctl->jobq, job);
 		recvp(job->donec);
 		delscene(job->scene);
 	}
@@ -259,7 +269,9 @@
 	fbctl->swap(fbctl);
 
 	updatestats(c, t1-t0);
-	updatetimes(c, job);
+	printtimings(job);
+//	free(job->times.Tn);
+//	free(job->times.Rn);
 
 	chanfree(job->donec);
 	free(job->camera);
--- a/clip.c
+++ b/clip.c
@@ -108,6 +108,7 @@
 			d1 = (j&1) == 0? sd1[j]: -sd1[j];
 			perc = d0/(d0 - d1);
 
+			memset(&v, 0, sizeof v);
 			lerpvertex(&v, v0, v1, perc);
 			addvert(Vout, v);
 
@@ -191,6 +192,8 @@
 	Point3 dp;
 	Point Δp;
 	double len, perc;
+
+	memset(v, 0, sizeof v);
 
 	dp = subpt3(v1->p, v0->p);
 	len = hypot(dp.x, dp.y);
--- a/graphics.h
+++ b/graphics.h
@@ -56,8 +56,8 @@
 typedef struct Shaderparams Shaderparams;
 typedef struct SUparams SUparams;
 typedef struct Shadertab Shadertab;
-typedef struct Renderer Renderer;
 typedef struct Rendertime Rendertime;
+typedef struct Renderer Renderer;
 typedef struct Renderjob Renderjob;
 typedef struct Fragment Fragment;
 typedef struct Astk Astk;
@@ -231,14 +231,16 @@
 	Color (*fshader)(Shaderparams*);	/* fragment shader */
 };
 
-struct Renderer
+struct Rendertime
 {
-	Channel *c;
+	uvlong t0, t1;
 };
 
-struct Rendertime
+struct Renderer
 {
-	uvlong t0, t1;
+	Channel *jobq;
+	ulong nprocs;
+	int doprof;	/* enable profiling */
 };
 
 struct Renderjob
@@ -245,6 +247,7 @@
 {
 	Ref;
 	uvlong id;
+	Renderer *rctl;
 	Framebuf *fb;
 	Camera *camera;
 	Scene *scene;
@@ -252,7 +255,8 @@
 	Channel *donec;
 
 	struct {
-		Rendertime R, E, Tn, Rn;	/* renderer, entityproc, tilers, rasterizers */
+		/* renderer, entityproc, tilers, rasterizers */
+		Rendertime R, E, Tn[20], Rn[20];
 	} times;
 
 	Renderjob *next;
@@ -356,10 +360,6 @@
 		uvlong min, avg, max, acc, n, v;
 		uvlong nframes;
 	} stats;
-	struct {
-		Rendertime R[10], E[10], Tn[10], Rn[10];
-		int last, cur;
-	} times;
 };
 
 /* camera */
--- a/internal.h
+++ b/internal.h
@@ -1,4 +1,5 @@
 typedef struct Polygon Polygon;
+typedef struct Entityparam Entityparam;
 typedef struct Tilerparam Tilerparam;
 typedef struct Rasterparam Rasterparam;
 typedef struct Rastertask Rastertask;
@@ -8,6 +9,12 @@
 	Vertex *v;
 	ulong n;
 	ulong cap;
+};
+
+struct Entityparam
+{
+	Renderer *rctl;
+	Channel *paramsc;
 };
 
 struct Tilerparam
--- a/render.c
+++ b/render.c
@@ -191,7 +191,7 @@
 {
 	SUparams *params;
 	Raster *cr, *zr;
-	Primitive prim;
+	Primitive *prim;
 	Vertex v;
 	Shaderparams fsp;
 	Triangle2 t;
@@ -204,8 +204,9 @@
 	int steep = 0, Δe, e, Δy;
 
 	params = task->params;
-	prim = task->p;
+	prim = &task->p;
 	memset(&fsp, 0, sizeof fsp);
+	memset(&v, 0, sizeof v);
 	fsp.su = params;
 	fsp.v = &v;
 	fsp.getuniform = sparams_getuniform;
@@ -216,11 +217,11 @@
 	cr = params->fb->rasters;
 	zr = cr->next;
 
-	switch(prim.type){
+	switch(prim->type){
 	case PPoint:
-		p = Pt(prim.v[0].p.x, prim.v[0].p.y);
+		p = Pt(prim->v[0].p.x, prim->v[0].p.y);
 
-		z = fclamp(prim.v[0].p.z, 0, 1);
+		z = fclamp(prim->v[0].p.z, 0, 1);
 		if(params->camera->enabledepth){
 			if(z <= getdepth(zr, p))
 				break;
@@ -227,7 +228,7 @@
 			putdepth(zr, p, z);
 		}
 
-		*fsp.v = dupvertex(&prim.v[0]);
+		fsp.v = &prim->v[0];
 		fsp.p = p;
 		c = params->fshader(&fsp);
 		if(params->camera->enableAbuff)
@@ -237,10 +238,10 @@
 		delvattrs(fsp.v);
 		break;
 	case PLine:
-		p0 = Pt(prim.v[0].p.x, prim.v[0].p.y);
-		p1 = Pt(prim.v[1].p.x, prim.v[1].p.y);
+		p0 = Pt(prim->v[0].p.x, prim->v[0].p.y);
+		p1 = Pt(prim->v[1].p.x, prim->v[1].p.y);
 		/* clip it against our wr */
-		if(rectclipline(task->wr, &p0, &p1, &prim.v[0], &prim.v[1]) < 0)
+		if(rectclipline(task->wr, &p0, &p1, &prim->v[0], &prim->v[1]) < 0)
 			break;
 
 		/* transpose the points */
@@ -253,7 +254,7 @@
 		/* make them left-to-right */
 		if(p0.x > p1.x){
 			SWAP(Point, &p0, &p1);
-			SWAP(Vertex, &prim.v[0], &prim.v[1]);
+			SWAP(Vertex, &prim->v[0], &prim->v[1]);
 		}
 
 		dp = subpt(p1, p0);
@@ -268,7 +269,7 @@
 
 			if(steep) SWAP(int, &p.x, &p.y);
 
-			z = flerp(prim.v[0].p.z, prim.v[1].p.z, perc);
+			z = flerp(prim->v[0].p.z, prim->v[1].p.z, perc);
 			/* TODO get rid of the bounds check and make sure the clipping doesn't overflow */
 			if(params->camera->enabledepth){
 				if(!ptinrect(p, params->fb->r) || z <= getdepth(zr, p))
@@ -277,12 +278,12 @@
 			}
 
 			/* interpolate z⁻¹ and get actual z */
-			pcz = flerp(prim.v[0].p.w, prim.v[1].p.w, perc);
+			pcz = flerp(prim->v[0].p.w, prim->v[1].p.w, perc);
 			pcz = 1.0/(pcz < 1e-5? 1e-5: pcz);
 
 			/* perspective-correct attribute interpolation  */
-			perc *= prim.v[0].p.w * pcz;
-			lerpvertex(fsp.v, &prim.v[0], &prim.v[1], perc);
+			perc *= prim->v[0].p.w * pcz;
+			lerpvertex(fsp.v, &prim->v[0], &prim->v[1], perc);
 
 			fsp.p = p;
 			c = params->fshader(&fsp);
@@ -290,7 +291,6 @@
 				pushtoAbuf(params->fb, p, c, z);
 			else
 				pixel(cr, p, c, params->camera->enableblend);
-			delvattrs(fsp.v);
 discard:
 			if(steep) SWAP(int, &p.x, &p.y);
 
@@ -300,20 +300,18 @@
 				e -= 2*dp.x;
 			}
 		}
+		delvattrs(fsp.v);
 		break;
 	case PTriangle:
-		t.p0 = Pt2(prim.v[0].p.x, prim.v[0].p.y, 1);
-		t.p1 = Pt2(prim.v[1].p.x, prim.v[1].p.y, 1);
-		t.p2 = Pt2(prim.v[2].p.x, prim.v[2].p.y, 1);
+		t.p0 = Pt2(prim->v[0].p.x, prim->v[0].p.y, 1);
+		t.p1 = Pt2(prim->v[1].p.x, prim->v[1].p.y, 1);
+		t.p2 = Pt2(prim->v[2].p.x, prim->v[2].p.y, 1);
 		/* find the triangle's bbox and clip it against our wr */
 		bbox.min.x = min(min(t.p0.x, t.p1.x), t.p2.x);
 		bbox.min.y = min(min(t.p0.y, t.p1.y), t.p2.y);
 		bbox.max.x = max(max(t.p0.x, t.p1.x), t.p2.x)+1;
 		bbox.max.y = max(max(t.p0.y, t.p1.y), t.p2.y)+1;
-		bbox.min.x = max(bbox.min.x, task->wr.min.x);
-		bbox.min.y = max(bbox.min.y, task->wr.min.y);
-		bbox.max.x = min(bbox.max.x, task->wr.max.x);
-		bbox.max.y = min(bbox.max.y, task->wr.max.y);
+		rectclip(&bbox, task->wr);
 
 		for(p.y = bbox.min.y; p.y < bbox.max.y; p.y++)
 		for(p.x = bbox.min.x; p.x < bbox.max.x; p.x++){
@@ -321,7 +319,7 @@
 			if(bc.x < 0 || bc.y < 0 || bc.z < 0)
 				continue;
 
-			z = fberp(prim.v[0].p.z, prim.v[1].p.z, prim.v[2].p.z, bc);
+			z = fberp(prim->v[0].p.z, prim->v[1].p.z, prim->v[2].p.z, bc);
 			if(params->camera->enabledepth){
 				if(z <= getdepth(zr, p))
 					continue;
@@ -329,14 +327,14 @@
 			}
 
 			/* interpolate z⁻¹ and get actual z */
-			pcz = fberp(prim.v[0].p.w, prim.v[1].p.w, prim.v[2].p.w, bc);
+			pcz = fberp(prim->v[0].p.w, prim->v[1].p.w, prim->v[2].p.w, bc);
 			pcz = 1.0/(pcz < 1e-5? 1e-5: pcz);
 
 			/* perspective-correct attribute interpolation  */
-			bc = modulapt3(bc, Vec3(prim.v[0].p.w*pcz,
-						prim.v[1].p.w*pcz,
-						prim.v[2].p.w*pcz));
-			berpvertex(fsp.v, &prim.v[0], &prim.v[1], &prim.v[2], bc);
+			bc = modulapt3(bc, Vec3(prim->v[0].p.w*pcz,
+						prim->v[1].p.w*pcz,
+						prim->v[2].p.w*pcz));
+			berpvertex(fsp.v, &prim->v[0], &prim->v[1], &prim->v[2], bc);
 
 			fsp.p = p;
 			c = params->fshader(&fsp);
@@ -344,8 +342,8 @@
 				pushtoAbuf(params->fb, p, c, z);
 			else
 				pixel(cr, p, c, params->camera->enableblend);
-			delvattrs(fsp.v);
 		}
+		delvattrs(fsp.v);
 		break;
 	default: sysfatal("alien primitive detected");
 	}
@@ -357,6 +355,7 @@
 	Rasterparam *rp;
 	Rastertask *task;
 	SUparams *params;
+	Renderjob *job;
 	uvlong t0;
 	int i;
 
@@ -368,22 +367,23 @@
 		t0 = nanosec();
 
 		params = task->params;
+		job = params->job;
+		if(job->times.Rn[rp->id].t0 == 0)
+			job->times.Rn[rp->id].t0 = t0;
+
 		/* end of job */
 		if(params->entity == nil){
-			if(decref(params->job) < 1){
-				if(params->job->camera->enableAbuff)
-					squashAbuf(params->job->fb, params->job->camera->enableblend);
-				params->job->times.Rn.t1 = nanosec();
-				nbsend(params->job->donec, nil);
+			if(decref(job) < 1){
+				if(job->camera->enableAbuff)
+					squashAbuf(job->fb, job->camera->enableblend);
+				nbsend(job->donec, nil);
 				free(params);
 			}
+			job->times.Rn[rp->id].t1 = nanosec();
 			free(task);
 			continue;
 		}
 
-		if(params->job->times.Rn.t0 == 0)
-			params->job->times.Rn.t0 = t0;
-
 		rasterize(task);
 
 		for(i = 0; i < task->p.type+1; i++)
@@ -423,11 +423,12 @@
 
 	while((params = recvp(tp->paramsc)) != nil){
 		t0 = nanosec();
-		if(params->job->times.Tn.t0 == 0)
-			params->job->times.Tn.t0 = t0;
+		if(params->job->times.Tn[tp->id].t0 == 0)
+			params->job->times.Tn[tp->id].t0 = t0;
 
 		/* end of job */
 		if(params->entity == nil){
+			params->job->times.Tn[tp->id].t1 = nanosec();
 			if(decref(params->job) < 1){
 				params->job->ref = nproc;
 				for(i = 0; i < nproc; i++){
@@ -436,7 +437,6 @@
 					task->params = params;
 					sendp(taskchans[i], task);
 				}
-				params->job->times.Tn.t1 = nanosec();
 			}
 			continue;
 		}
@@ -476,7 +476,7 @@
 				bbox.max.y = p->v[0].p.y+1;
 
 				for(i = 0; i < nproc; i++)
-					if(rectXrect(bbox,wr[i])){
+					if(rectXrect(bbox, wr[i])){
 						newparams = emalloc(sizeof *newparams);
 						*newparams = *params;
 						task = emalloc(sizeof *task);
@@ -485,6 +485,7 @@
 						task->p = *p;
 						task->p.v[0] = dupvertex(&p->v[0]);
 						sendp(taskchans[i], task);
+						break;
 					}
 				delvattrs(&p->v[0]);
 				break;
@@ -518,7 +519,7 @@
 				bbox.max.y = max(p->v[0].p.y, p->v[1].p.y)+1;
 
 				for(i = 0; i < nproc; i++)
-					if(rectXrect(bbox,wr[i])){
+					if(rectXrect(bbox, wr[i])){
 						newparams = emalloc(sizeof *newparams);
 						*newparams = *params;
 						task = emalloc(sizeof *task);
@@ -570,7 +571,7 @@
 					bbox.max.y = max(max(p->v[0].p.y, p->v[1].p.y), p->v[2].p.y)+1;
 
 					for(i = 0; i < nproc; i++)
-						if(rectXrect(bbox,wr[i])){
+						if(rectXrect(bbox, wr[i])){
 							newparams = emalloc(sizeof *newparams);
 							*newparams = *params;
 							task = emalloc(sizeof *task);
@@ -598,12 +599,12 @@
 static void
 entityproc(void *arg)
 {
+	Entityparam *ep;
 	Channel *paramsin, **paramsout, **taskchans;
 	Tilerparam *tp;
 	Rasterparam *rp;
 	SUparams *params, *newparams;
 	Primitive *eb, *ee;
-	char *nprocs;
 	ulong stride, nprims, nproc, nworkers;
 	int i;
 	uvlong t0;
@@ -610,13 +611,12 @@
 
 	threadsetname("entityproc");
 
-	paramsin = arg;
-	nprocs = getenv("NPROC");
-	if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
-		nproc = 1;
-	else
+	ep = arg;
+	paramsin = ep->paramsc;
+
+	nproc = ep->rctl->nprocs;
+	if(nproc > 2)
 		nproc /= 2;
-	free(nprocs);
 
 	paramsout = emalloc(nproc*sizeof(*paramsout));
 	taskchans = emalloc(nproc*sizeof(*taskchans));
@@ -641,6 +641,14 @@
 		if(params->job->times.E.t0 == 0)
 			params->job->times.E.t0 = t0;
 
+		/* prof: initialize timing slots for the next stages */
+//		if(params->job->times.Tn == nil){
+//			params->job->times.Tn = emalloc(nproc*sizeof(Rendertime));
+//			params->job->times.Rn = emalloc(nproc*sizeof(Rendertime));
+//			memset(params->job->times.Tn, 0, nproc*sizeof(Rendertime));
+//			memset(params->job->times.Rn, 0, nproc*sizeof(Rendertime));
+//		}
+
 		/* end of job */
 		if(params->entity == nil){
 			params->job->ref = nproc;
@@ -676,23 +684,25 @@
 static void
 renderer(void *arg)
 {
-	Channel *jobc;
+	Renderer *rctl;
 	Renderjob *job;
 	Scene *sc;
 	Entity *ent;
 	SUparams *params;
-	Channel *paramsc;
+	Entityparam *ep;
 	uvlong time, lastid;
 
 	threadsetname("renderer");
 
-	jobc = arg;
+	rctl = arg;
 	lastid = 0;
-	paramsc = chancreate(sizeof(SUparams*), 8);
 
-	proccreate(entityproc, paramsc, mainstacksize);
+	ep = emalloc(sizeof *ep);
+	ep->rctl = rctl;
+	ep->paramsc = chancreate(sizeof(SUparams*), 8);
+	proccreate(entityproc, ep, mainstacksize);
 
-	while((job = recvp(jobc)) != nil){
+	while((job = recvp(rctl->jobq)) != nil){
 		time = nanosec();
 		job->times.R.t0 = time;
 		job->id = lastid++;
@@ -718,13 +728,13 @@
 			params->uni_time = time;
 			params->vshader = job->shaders->vshader;
 			params->fshader = job->shaders->fshader;
-			sendp(paramsc, params);
+			sendp(ep->paramsc, params);
 		}
 		/* mark end of job */
 		params = emalloc(sizeof *params);
 		memset(params, 0, sizeof *params);
 		params->job = job;
-		sendp(paramsc, params);
+		sendp(ep->paramsc, params);
 
 		job->times.R.t1 = nanosec();
 	}
@@ -734,9 +744,17 @@
 initgraphics(void)
 {
 	Renderer *r;
+	char *nprocs;
+	ulong nproc;
 
+	nprocs = getenv("NPROC");
+	if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
+		nproc = 1;
+	free(nprocs);
+
 	r = emalloc(sizeof *r);
-	r->c = chancreate(sizeof(Renderjob*), 8);
-	proccreate(renderer, r->c, mainstacksize);
+	r->jobq = chancreate(sizeof(Renderjob*), 8);
+	r->nprocs = nproc;
+	proccreate(renderer, r, mainstacksize);
 	return r;
 }
--- a/vertex.c
+++ b/vertex.c
@@ -59,8 +59,6 @@
 	v->c = lerp3(v0->c, v1->c, t);
 	v->uv = lerp2(v0->uv, v1->uv, t);
 	v->mtl = v0->mtl != nil? v0->mtl: v1->mtl;
-	v->attrs = nil;
-	v->nattrs = 0;
 	for(i = 0; i < v0->nattrs; i++){
 		va.id = v0->attrs[i].id;
 		va.type = v0->attrs[i].type;
@@ -86,8 +84,6 @@
 	v->c = berp3(v0->c, v1->c, v2->c, bc);
 	v->uv = berp2(v0->uv, v1->uv, v2->uv, bc);
 	v->mtl = v0->mtl != nil? v0->mtl: v1->mtl != nil? v1->mtl: v2->mtl;
-	v->attrs = nil;
-	v->nattrs = 0;
 	for(i = 0; i < v0->nattrs; i++){
 		va.id = v0->attrs[i].id;
 		va.type = v0->attrs[i].type;