shithub: tinygl

Download patch

ref: 63ec41d0a6f4ed434f0ff3f2d68cdd008b45eb34
parent: 7536fd01dbe592cdd105f3f1bfbc4e21987d8d67
author: David <gek@katherine>
date: Wed Mar 10 07:12:04 EST 2021

Automatic commit.

--- a/SDL_Examples/texture.c
+++ b/SDL_Examples/texture.c
@@ -61,6 +61,7 @@
 }
 
 void draw() {
+	glClearColor(0.0, 0.0, 0.0, 0.0);
 	glEnable(GL_TEXTURE_2D);
 	glBindTexture(GL_TEXTURE_2D, tex);
 	glBegin(GL_TRIANGLES);
@@ -256,7 +257,7 @@
 		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
 		draw();
 		glDrawText((unsigned char*)"\nBlitting text\nto the screen!", 0, 0, 0x000000FF);
-		glPixelZoom(0.5,1);
+		glPixelZoom(2.0,0.7);
 		glRasterPos3f(-1,-1,fabs(sinf(frames_notreset/200.0)));
 		{
 			GLint xsize, ysize;
--- a/config.mk
+++ b/config.mk
@@ -4,7 +4,7 @@
 CC= gcc
 #CFLAGS= -Wall -w -O3 -g -std=c99 -mtune=native -DNDEBUG
 #CFLAGS= -Wall -w -O3 -g -std=c99 -march=native -DNDEBUG
-CFLAGS= -Wall -O3 -std=c99 -DNDEBUG -g
+CFLAGS= -Wall -O3 -std=c99 -DNDEBUG -g -fopenmp -Wno-uninitialized
 #CFLAGS= -Wall -O1 -g -std=c99 -Wno-undef -DNDEBUG
 LFLAGS=
 
--- a/include/zbuffer.h
+++ b/include/zbuffer.h
@@ -71,11 +71,7 @@
 //#define TGL_CLAMPI(imp) ( (imp>0) * (COLOR_MASK * (imp>COLOR_MASK) + imp * (!(imp>COLOR_MASK)) )      )
 #define TGL_CLAMPI(imp) ( (imp>0)?((imp>COLOR_MASK)?COLOR_MASK:imp):0   )
 
-//#if TGL_FEATURE_BETTER_COLOR_INTERP == 1
-//#define (imp) imp = TGL_CLAMPI2((imp));
-//#else
-//#define (imp) /*a comment*/
-//#endif
+
 
 
 #if TGL_FEATURE_RENDER_BITS == 32
--- a/src/zline.c
+++ b/src/zline.c
@@ -7,8 +7,8 @@
 /* TODO: Implement blending for lines and points. */
 
 void ZB_plot(ZBuffer* zb, ZBufferPoint* p) {
-	GLushort* pz;
-	PIXEL* pp;
+	
+	
 	GLint zz;
 	GLubyte zbdw = zb->depth_write; 
 	GLubyte zbdt = zb->depth_test;
@@ -17,14 +17,20 @@
 	zz = p->z >> ZB_POINT_Z_FRAC_BITS;
 	//	PIXEL col;
 	if(zbps == 1){
+		GLushort* pz;
+		PIXEL* pp;
 		pz = zb->zbuf + (p->y * zb->xsize + p->x);
 		pp = (PIXEL*)((GLbyte*)zb->pbuf + zb->linesize * p->y + p->x * PSZB);
 		
 		if (ZCMP(zz, *pz)) {
+#if TGL_FEATURE_BLEND == 1
 			if(!zb->enable_blend)
 				*pp = RGB_TO_PIXEL(p->r, p->g, p->b);
 			else
 				TGL_BLEND_FUNC_RGB(p->r, p->g, p->b, (*pp))
+#else
+			*pp = RGB_TO_PIXEL(p->r, p->g, p->b);
+#endif
 			if(zbdw)
 				*pz = zz;
 		}
@@ -40,14 +46,18 @@
 		for(GLint y = by; y < ey; y++)
 		for(GLint x = bx; x < ex; x++)
 		{
-			pz = zb->zbuf + (y * zb->xsize + x);
-			pp = (PIXEL*)((GLbyte*)zb->pbuf + zb->linesize * y + x * PSZB);
+			GLushort* pz = zb->zbuf + (y * zb->xsize + x);
+			PIXEL* pp = (PIXEL*)((GLbyte*)zb->pbuf + zb->linesize * y + x * PSZB);
 			//zz = p->z >> ZB_POINT_Z_FRAC_BITS;
 			if (ZCMP(zz, *pz)) {
+#if TGL_FEATURE_BLEND == 1
 				if(!zb->enable_blend)
 					*pp = col;
 				else
 					TGL_BLEND_FUNC_RGB(p->r, p->g, p->b, (*pp))
+#else
+				*pp = col;
+#endif
 				if(zbdw)
 					*pz = zz;
 			}
--- a/src/zraster.c
+++ b/src/zraster.c
@@ -117,7 +117,7 @@
 	PIXEL* d = p[3].p;
 	PIXEL* pbuf = zb->pbuf;
 	GLushort* zbuf = zb->zbuf;
-	GLushort* pz;
+	
 	GLubyte zbdw = zb->depth_write; 
 	GLubyte zbdt = zb->depth_test;
 	GLint tw = zb->xsize;
@@ -124,9 +124,8 @@
 	GLint th = zb->ysize;
 	GLfloat pzoomx = c->pzoomx;
 	GLfloat pzoomy = c->pzoomy;
-	V4 rastoffset;
-	rastoffset.v[0] = rastpos.v[0];
-	rastoffset.v[1] = rastpos.v[1];
+	
+
 	GLint zz = c->rasterpos_zz;
 #if TGL_FEATURE_BLEND_DRAW_PIXELS == 1
 	TGL_BLEND_VARS
@@ -150,10 +149,13 @@
 		);
 		return;
 	}
-	for(GLint sx = 0; sx < w; sx++)
+// Works.
+#pragma omp parallel for
 	for(GLint sy = 0; sy < h; sy++)
+	for(GLint sx = 0; sx < w; sx++)
 	{
 		PIXEL col = d[sy*w+sx];
+		V4 rastoffset;
 		rastoffset.v[0] = rastpos.v[0] +  (GLfloat)sx * pzoomx;
 		rastoffset.v[1] = rastpos.v[1] - ((GLfloat)(h-sy) * pzoomy);
 		rastoffset.v[2] = rastoffset.v[0] + pzoomx;
@@ -161,7 +163,7 @@
 		for(GLint tx = rastoffset.v[0]; (GLfloat)tx < rastoffset.v[2];tx++)
 		for(GLint ty = rastoffset.v[1]; (GLfloat)ty > rastoffset.v[3];ty--)
 			if(CLIPTEST(tx,ty,tw,th)){
-			pz = zbuf + (ty * tw + tx);
+			GLushort* pz = zbuf + (ty * tw + tx);
 
 				if(ZCMP(zz,*pz)){
 
--- a/src/ztriangle.c
+++ b/src/ztriangle.c
@@ -300,10 +300,10 @@
 		register GLuint s, t, z;                                                                                                                   		           \
 		register GLint n;                                                                                                                          	               \
 		OR1OG1OB1DECL                                                                                                                                              \
-		GLfloat sz, tz, fz, zinv;                                                                                                                                  \
+		GLfloat sz, tz, fzl, zinv;                                                                                                                                  \
 		n = (x2 >> 16) - x1;                                                                                                                                       \
-		fz = (GLfloat)z1;                                                                                                                                          \
-		zinv = 1.0 / fz;                                                                                                                                           \
+		fzl = (GLfloat)z1;                                                                                                                                          \
+		zinv = 1.0 / fzl;                                                                                                                                           \
 		pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB);                                                                                                                   \
 		pz = pz1 + x1;                                                                                                                                             \
 		z = z1;                                                                                                                                                    \
@@ -310,7 +310,7 @@
 		sz = sz1;                                                                                                                                                  \
 		tz = tz1;                                                                                                                                                  \
 		while (n >= (NB_INTERP - 1)) {                                                                                                                             \
-			register GLint  dsdx, dtdx;																															   \
+			register GLint dsdx, dtdx;																															   \
 			{                                                                                                                                                      \
 				GLfloat ss, tt;                                                                                                                                    \
 				ss = (sz * zinv);                                                                                                                                  \
@@ -320,8 +320,8 @@
 				dsdx = (GLint)((dszdx - ss * fdzdx) * zinv);                                                                                                       \
 				dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv);																								       \
 			}                                                                                                   										           \
-				fz += fndzdx;                                                                                                                                      \
-				zinv = 1.0 / fz;                                                                                                                                   \
+				fzl += fndzdx;                                                                                                                                      \
+				zinv = 1.0 / fzl;                                                                                                                                   \
 			PUT_PIXEL(0); /*the_x++;*/                                                                                                                             \
 			PUT_PIXEL(1); /*the_x++;*/                                                                                                                             \
 			PUT_PIXEL(2); /*the_x++;*/                                                                                                                             \
--- a/src/ztriangle.h
+++ b/src/ztriangle.h
@@ -17,53 +17,40 @@
  5) Fixed point math is used for the depth "z" buffer
  6) We're not just using floats for everything because this is still supposed to be fast on platforms without SSE2
  7) Fewer variables is usually better
- 8) All variables that are used inside the rasterizer loop must exist irrespective of feature level at the language level,
- because OpenMP needs to be able to declare them "private". The compiler will of course optimize out the unused ones.
  */
 
-#define BEGIN_SEGMENT
-
 {
+	GLfloat fdx1, fdx2, fdy1, fdy2;
+	GLushort* pz1;
+	PIXEL* pp1;
 	
-	GLfloat fdx1, fdx2, fdy1, fdy2, fz;
+
 	GLint dx1, dy1, dx2, dy2;
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+	GLushort the_y;
+#endif
+	GLint error, derror;
 	GLint x1, dxdy_min, dxdy_max;
 	/* warning: x2 is multiplied by 2^16 */
 	GLint x2, dx2dy2;
 
-//#ifdef INTERP_Z
+#ifdef INTERP_Z
 	GLint z1, dzdx, dzdy, dzdl_min, dzdl_max;
-//#else
-//	static const GLint z1, dzdx, dzdy, dzdl_min, dzdl_max;
-//#endif
-//#ifdef INTERP_RGB
+#endif
+#ifdef INTERP_RGB
 	GLint r1, drdx, drdy, drdl_min, drdl_max;
 	GLint g1, dgdx, dgdy, dgdl_min, dgdl_max;
 	GLint b1, dbdx, dbdy, dbdl_min, dbdl_max;
-	//r1, drdx, drdy, drdl_min, drdl_max, g1, dgdx, dgdy, dgdl_min, dgdl_max, b1, dbdx, dbdy, dbdl_min, dbdl_max
-//#else
-//	static const GLint r1, drdx, drdy, drdl_min, drdl_max;
-//	static const GLint g1, dgdx, dgdy, dgdl_min, dgdl_max;
-//	static const GLint b1, dbdx, dbdy, dbdl_min, dbdl_max;
-//#endif
-//#ifdef INTERP_ST
+#endif
+#ifdef INTERP_ST
 	GLint s1, dsdx, dsdy, dsdl_min, dsdl_max;
 	GLint t1, dtdx, dtdy, dtdl_min, dtdl_max;
-	//s1, dsdx, dsdy, dsdl_min, dsdl_max, t1, dtdx, dtdy, dtdl_min, dtdl_max
-//#else
-//	static const GLint s1, dsdx, dsdy, dsdl_min, dsdl_max;
-//	static const GLint t1, dtdx, dtdy, dtdl_min, dtdl_max;
-//#endif
-//#ifdef INTERP_STZ
+#endif
+#ifdef INTERP_STZ
 	GLfloat sz1, dszdx, dszdy, dszdl_min, dszdl_max;
 	GLfloat tz1, dtzdx, dtzdy, dtzdl_min, dtzdl_max;
 	GLfloat fdzdx, fndzdx, ndszdx, ndtzdx;
-	//sz1, dszdx, dszdy, dszdl_min, dszdl_max, tz1, dtzdx, dtzdy, dtzdl_min, dtzdl_max
-//#else
-//	static const GLfloat sz1, dszdx, dszdy, dszdl_min, dszdl_max;
-//	static const GLfloat tz1, dtzdx, dtzdy, dtzdl_min, dtzdl_max;
-//	static const GLfloat fdzdx, fndzdx, ndszdx, ndtzdx;
-//#endif
+#endif
 
 	/* we sort the vertex with increasing y */
 	if (p1->y < p0->y) {
@@ -90,7 +77,7 @@
 	fdx2 = p2->x - p0->x;
 	fdy2 = p2->y - p0->y;
 	
-	fz = fdx1 * fdy2 - fdx2 * fdy1;//fz first usage
+	GLfloat fz = fdx1 * fdy2 - fdx2 * fdy1;//fz first usage
 	if (fz == 0)
 		return;
 	fz = 1.0 / fz; //value of fz is used (VALUE_FZ_USED)
@@ -176,56 +163,42 @@
 } //EOF d1, d2 lifetimes.
 	/* screen coordinates */
 
-/*
 	pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y; //pp1 first usage
 #if TGL_FEATURE_POLYGON_STIPPLE == 1
 	the_y = p0->y;
 #endif
 	pz1 = zb->zbuf + p0->y * zb->xsize;
-*/
+
 	DRAW_INIT();
+//part used here and down.
+//TODO: #pragma omp parallel for private(a, b, c)
 //Required reading:
 //http://jakascorner.com/blog/2016/06/omp-data-sharing-attributes.html
 //I'd also like to figure out if the main while() loop over raster lines can be OMP parallelized, but I suspect it isn't worth it.
-#pragma omp parallel for private(x1, dxdy_min, dxdy_max, x2, dx2dy2, z1, dzdx, dzdy, dzdl_min, dzdl_max, r1, drdx, drdy, drdl_min, drdl_max, g1, dgdx, dgdy, dgdl_min, dgdl_max, b1, dbdx, dbdy, dbdl_min, dbdl_max, s1, dsdx, dsdy, dsdl_min, dsdl_max, t1, dtdx, dtdy, dtdl_min, dtdl_max, sz1, dszdx, dszdy, dszdl_min, dszdl_max, tz1, dtzdx, dtzdy, dtzdl_min, dtzdl_max, fdzdx, fndzdx, ndszdx, ndtzdx)
 	for (GLint part = 0; part < 2; part++) {
 		GLint nb_lines;
-		GLushort* pz1;
-		PIXEL* pp1;
-		GLint error, derror;
-		GLfloat fzl = fz;
-		GLint dx1, dy1, dx2, dy2;
-#if TGL_FEATURE_POLYGON_STIPPLE == 1
-		GLushort the_y;
-#else
-		static const GLushort the_y; //Unused variable necessary 
-#endif
 		{ZBufferPoint *pr1, *pr2, *l1, *l2; //BEGINNING OF LIFETIME FOR ZBUFFERPOINT VARS!!!
 		register GLint update_left, update_right; //update_left decl
 			if (part == 0) {
-				update_left = 1; //update_left first usage.
-				update_right = 1;
-				
-				if (fzl > 0) { //Here! (VALUE_FZ_USED)
+				if (fz > 0) { //Here! (VALUE_FZ_USED)
+					update_left = 1; //update_left first usage.
+					update_right = 1;
 					l1 = p0; //MARK l1 first usage
 					l2 = p2; //MARK l2 first usage
 					pr1 = p0; //MARK first usage of pr1
 					pr2 = p1; //MARK first usage pf pr2
 				} else {
+					update_left = 1; //update_left second usage.
+					update_right = 1;
 					l1 = p0;
 					l2 = p1;
 					pr1 = p0;
 					pr2 = p2;
 				}
-				pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y; //pp1 first usage
-#if TGL_FEATURE_POLYGON_STIPPLE == 1
-				the_y = p0->y;
-#endif
-				pz1 = zb->zbuf + p0->y * zb->xsize;
 				nb_lines = p1->y - p0->y;
 			} else { //SECOND PART~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 				/* second part */
-				if (fzl > 0) { //fzl last usage (VALUE_FZ_USED)
+				if (fz > 0) { //fz last usage (VALUE_FZ_USED)
 					update_left = 0; //update left third usage.
 					update_right = 1;
 					pr1 = p1;
@@ -236,11 +209,6 @@
 					l1 = p1;
 					l2 = p2;
 				}
-				pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y + (zb->xsize * (p1->y - p0->y)); //pp1 update to second part.
-#if TGL_FEATURE_POLYGON_STIPPLE == 1
-				the_y = p1->y;
-#endif
-				pz1 = zb->zbuf + p0->y * zb->xsize + (zb->xsize * (p1->y - p0->y));
 				nb_lines = p2->y - p1->y + 1;
 			} //EOF SECOND PART
 
@@ -309,11 +277,8 @@
 		} //End of lifetime for ZBufferpoints
 		/* we draw all the scan line of the part */
 
-		//TODO: omp parallel for
-		//for(GLint q = 0; q < nb_lines; q++)
-		//for(; nb_lines > 0; nb_lines--) //Replaces the while.
-		while(nb_lines>0)
-		{ nb_lines--; //Effectively matching the while.
+		while (nb_lines > 0) {
+			nb_lines--;
 #ifndef DRAW_LINE
 			/* generic draw line */
 			{
--- /dev/null
+++ b/src/ztriangle_tried_openmp_backup.h
@@ -1,0 +1,450 @@
+/*
+ * An eXtReMeLy complicated, delicate, tuned triangle rasterizer
+ * Aight, so basically this is the most complicated code you'll ever read in your life.
+ * The lifetime of variables has been... SUPER Optimized, that's why there's so many random ass curly braces everywhere.
+ * Yes, it is necessary to do that. This code is extremely delicate
+ * and even a minor fuck-up is gonna tank the framerate
+
+Before committing any changes, run gears, model, and texture on your changed code to make sure you didn't
+fuck up!
+
+Things to keep in mind:
+ 1) Tight control of the lifetimes, scopes, and usage of variables lets us use registers more often and memory less
+ 2) Doing the same operation on multiple items is faster than doing different things on different items, generally, because
+   they will be able to take advantage of any/all applicable SIMD/vector ops on your hardware.
+ 3) Divide operations are vastly more expensive than add/sub/bitwise/etc
+ 4) Bit shifting is your friend, it's the fast way to multiply or divide by 2.
+ 5) Fixed point math is used for the depth "z" buffer
+ 6) We're not just using floats for everything because this is still supposed to be fast on platforms without SSE2
+ 7) Fewer variables is usually better
+ 8) All variables that are used inside the rasterizer loop must exist irrespective of feature level at the language level,
+ because OpenMP needs to be able to declare them "private". The compiler will of course optimize out the unused ones.
+ */
+
+#define BEGIN_SEGMENT
+
+{
+	//All these variables are safe to be shared.
+	
+	//GLint dx1, dy1, dx2, dy2;
+	//these VAR VAR_min, and VAR_max must be private.
+	
+	
+
+//#ifdef INTERP_Z
+	
+	GLint dzdx, dzdy;
+//#ifdef INTERP_RGB
+	
+	GLint drdx, drdy;
+	
+	GLint dgdx, dgdy;
+	
+	GLint dbdx, dbdy;
+	
+	GLint dsdx, dsdy;
+	
+	GLint dtdx, dtdy;
+	
+	GLfloat dszdx, dszdy;
+	
+	GLfloat dtzdx, dtzdy; //Not the cause of our problem.
+	GLfloat fdzdx, fndzdx, ndszdx, ndtzdx;
+
+	/* we sort the vertex with increasing y */
+	if (p1->y < p0->y) {
+		ZBufferPoint *t = p0;
+		p0 = p1;
+		p1 = t;
+	}
+	if (p2->y < p0->y) {
+		ZBufferPoint *t = p2;
+		p2 = p1;
+		p1 = p0;
+		p0 = t;
+	} else if (p2->y < p1->y) {
+		ZBufferPoint *t = p1;
+		p1 = p2;
+		p2 = t;
+	}
+GLfloat fz;
+{GLfloat fdx1, fdx2, fdy1, fdy2;
+	/* we compute dXdx and dXdy for all GLinterpolated values */
+	fdx1 = p1->x - p0->x;//fdx1 first usage (VALUE_FDX1_USED)
+	fdy1 = p1->y - p0->y;//fdy1 first usage (VALUE_FDY1_USED)
+
+	fdx2 = p2->x - p0->x;
+	fdy2 = p2->y - p0->y;
+	
+	fz = fdx1 * fdy2 - fdx2 * fdy1;//fz first usage
+	if (fz == 0)
+		return;
+	fz = 1.0 / fz; //value of fz is used (VALUE_FZ_USED)
+	//for these (VALUE_FZ_USED)
+	fdx1 *= fz;
+	fdy1 *= fz;
+	fdx2 *= fz;
+	fdy2 *= fz;
+	//and then
+	{
+	GLfloat d1, d2;
+	#ifdef INTERP_Z
+	{
+		d1 = p1->z - p0->z; //d1 first usage
+		d2 = p2->z - p0->z;
+		dzdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+		dzdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+	}
+	#endif
+
+	#ifdef INTERP_RGB
+	{
+		d1 = p1->r - p0->r;
+		d2 = p2->r - p0->r;
+		drdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+		drdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+	}
+	{
+		d1 = p1->g - p0->g;
+		d2 = p2->g - p0->g;
+		dgdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+		dgdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+	}
+	{
+		d1 = p1->b - p0->b;
+		d2 = p2->b - p0->b;
+		dbdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+		dbdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+	}
+	#endif
+
+	#ifdef INTERP_ST
+	{
+		d1 = p1->s - p0->s;
+		d2 = p2->s - p0->s;
+		dsdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+		dsdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+	}
+	{
+		d1 = p1->t - p0->t;
+		d2 = p2->t - p0->t;
+		dtdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+		dtdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+	}
+	#endif
+
+	#ifdef INTERP_STZ
+		{
+			GLfloat zedzed;
+			zedzed = (GLfloat)p0->z;
+			p0->sz = (GLfloat)p0->s * zedzed;
+			p0->tz = (GLfloat)p0->t * zedzed;
+			zedzed = (GLfloat)p1->z;
+			p1->sz = (GLfloat)p1->s * zedzed;
+			p1->tz = (GLfloat)p1->t * zedzed;
+			zedzed = (GLfloat)p2->z;
+			p2->sz = (GLfloat)p2->s * zedzed;
+			p2->tz = (GLfloat)p2->t * zedzed;
+		}
+		{
+			d1 = p1->sz - p0->sz;
+			d2 = p2->sz - p0->sz;
+			dszdx = (fdy2 * d1 - fdy1 * d2);
+			dszdy = (fdx1 * d2 - fdx2 * d1);
+		}
+		{
+			d1 = p1->tz - p0->tz;
+			d2 = p2->tz - p0->tz;
+			dtzdx = (fdy2 * d1 - fdy1 * d2);
+			dtzdy = (fdx1 * d2 - fdx2  * d1);
+		}
+	#endif
+	}//EOF d1, d2 lifetimes.
+
+}//eof fdx1 fdx2 fdy1 fdy2 lifetimes.
+	/* screen coordinates */
+
+/*
+	pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y; //pp1 first usage
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+	the_y = p0->y;
+#endif
+	pz1 = zb->zbuf + p0->y * zb->xsize;
+*/
+	DRAW_INIT();
+//Required reading:
+//http://jakascorner.com/blog/2016/06/omp-data-sharing-attributes.html
+//I'd also like to figure out if the main while() loop over raster lines can be OMP parallelized, but I suspect it isn't worth it.
+#pragma omp parallel for num_threads(2)
+	for (GLint part = 0; part < 2; part++) {
+	//Variables specific to this part of the triangle.
+#define ZEROASSN =0
+//#define ZEROASSN /* a comment*/
+
+		GLint x1 ZEROASSN, dxdy_min ZEROASSN, dxdy_max ZEROASSN;
+		GLint z1 ZEROASSN, dzdl_min ZEROASSN, dzdl_max ZEROASSN;
+		GLint r1 ZEROASSN, drdl_min ZEROASSN, drdl_max ZEROASSN;
+		GLint g1 ZEROASSN,  dgdl_min ZEROASSN, dgdl_max ZEROASSN;
+		GLint b1 ZEROASSN, dbdl_min ZEROASSN, dbdl_max ZEROASSN;
+		GLint s1 ZEROASSN, dsdl_min ZEROASSN, dsdl_max ZEROASSN;
+		GLint t1 ZEROASSN, dtdl_min ZEROASSN, dtdl_max ZEROASSN;
+		GLfloat sz1 ZEROASSN, dszdl_min ZEROASSN, dszdl_max ZEROASSN;
+		GLfloat tz1 ZEROASSN, dtzdl_min ZEROASSN, dtzdl_max ZEROASSN;
+		GLint nb_lines ZEROASSN;
+		GLushort* pz1 ZEROASSN;
+		PIXEL* pp1 ZEROASSN;
+		GLint error ZEROASSN, derror ZEROASSN;
+		GLfloat fzl = fz;
+		GLint dx1 ZEROASSN, dy1 ZEROASSN, dx2 ZEROASSN, dy2 ZEROASSN;
+		/* warning: x2 is multiplied by 2^16 */
+		GLint x2 ZEROASSN, dx2dy2 ZEROASSN;
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+		GLushort the_y ZEROASSN;
+#endif
+		{ZBufferPoint *pr1, *pr2, *l1, *l2; //BEGINNING OF LIFETIME FOR ZBUFFERPOINT VARS!!!
+		register GLint update_left, update_right; //update_left decl
+			if (part == 0) {
+				update_left = 1; //update_left first usage.
+				update_right = 1;
+				
+				if (fzl > 0) { //Here! (VALUE_FZ_USED)
+					l1 = p0; //MARK l1 first usage
+					l2 = p2; //MARK l2 first usage
+					pr1 = p0; //MARK first usage of pr1
+					pr2 = p1; //MARK first usage pf pr2
+				} else {
+					l1 = p0;
+					l2 = p1;
+					pr1 = p0;
+					pr2 = p2;
+				}
+				pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y; //pp1 first usage
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+				the_y = p0->y;
+#endif
+				pz1 = zb->zbuf + p0->y * zb->xsize;
+				nb_lines = p1->y - p0->y;
+			} else { //SECOND PART~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+				/* second part */
+				if (fzl > 0) { //fzl last usage (VALUE_FZ_USED)
+					update_left = 0;
+					update_right = 1;
+					pr1 = p1;
+					pr2 = p2;
+				} else {
+					update_left = 1;
+					update_right = 0;
+					l1 = p1;
+					l2 = p2;
+				}
+				pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y + (zb->xsize * (p1->y - p0->y)); //pp1 update to second part.
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+				the_y = p1->y;
+#endif
+				pz1 = zb->zbuf + p0->y * zb->xsize + (zb->xsize * (p1->y - p0->y));
+				nb_lines = p2->y - p1->y + 1;
+			} //EOF SECOND PART
+
+			/* compute the values for the left edge */
+			//pr1 and pr2 are not used inside this area.
+			if (update_left)
+			//if (1) 
+			{
+				{
+					register GLint tmp;
+					dy1 = l2->y - l1->y;
+					dx1 = l2->x - l1->x;
+					if (dy1 > 0)
+						tmp = (dx1 << 16) / dy1; 
+					else
+						tmp = 0;
+					x1 = l1->x;
+					error = 0;
+					derror = tmp & 0x0000ffff;
+					dxdy_min = tmp >> 16;
+				}
+				dxdy_max = dxdy_min + 1;
+#ifdef INTERP_Z
+				z1 = l1->z;
+				dzdl_min = (dzdy + dzdx * dxdy_min);
+				dzdl_max = dzdl_min + dzdx;
+#endif
+#ifdef INTERP_RGB
+				r1 = l1->r;
+				drdl_min = (drdy + drdx * dxdy_min);
+				drdl_max = drdl_min + drdx;
+				g1 = l1->g;
+				dgdl_min = (dgdy + dgdx * dxdy_min);
+				dgdl_max = dgdl_min + dgdx;
+				b1 = l1->b;
+				dbdl_min = (dbdy + dbdx * dxdy_min);
+				dbdl_max = dbdl_min + dbdx;
+#endif
+#ifdef INTERP_ST
+				s1 = l1->s;
+				dsdl_min = (dsdy + dsdx * dxdy_min);
+				dsdl_max = dsdl_min + dsdx;
+				t1 = l1->t;
+				dtdl_min = (dtdy + dtdx * dxdy_min);
+				dtdl_max = dtdl_min + dtdx;
+#endif
+#ifdef INTERP_STZ
+				sz1 = l1->sz;
+				dszdl_min = (dszdy + dszdx * dxdy_min);
+				dszdl_max = dszdl_min + dszdx;
+				tz1 = l1->tz;
+				dtzdl_min = (dtzdy + dtzdx * dxdy_min);
+				dtzdl_max = dtzdl_min + dtzdx;
+#endif
+			} //EOF update left
+			//Is l1 used after update_left?
+			/* compute values for the right edge */
+
+			if (update_right) 
+			//if (1) 
+			{ //Update right tested
+				dx2 = (pr2->x - pr1->x);
+				dy2 = (pr2->y - pr1->y); //LAST USAGE OF PR2
+				if (dy2 > 0)
+					dx2dy2 = (dx2 << 16) / dy2;
+				else
+					dx2dy2 = 0;
+				x2 = pr1->x << 16; //LAST USAGE OF PR1
+			} //EOF update right
+		} //End of lifetime for ZBufferpoints
+		/* we draw all the scan line of the part */
+
+		//TODO: omp parallel for
+		//for(GLint q = 0; q < nb_lines; q++)
+		//for(; nb_lines > 0; nb_lines--) //Replaces the while.
+		while(nb_lines>0)
+		{ nb_lines--; //Effectively matching the while.
+#ifndef DRAW_LINE
+			/* generic draw line */
+			{
+				register PIXEL* pp;
+				register GLint n;
+#ifdef INTERP_Z
+				register GLushort* pz;
+				register GLuint z;
+#endif
+#ifdef INTERP_RGB
+				register GLint or1, og1, ob1;
+#endif
+#ifdef INTERP_ST
+				register GLuint s, t;
+#endif
+#ifdef INTERP_STZ
+				//GLfloat sz, tz; //These variables go unused in this draw line function.
+#endif
+
+				n = (x2 >> 16) - x1;
+				//pp = (PIXEL*)((GLbyte*)pp1 + x1 * PS_ZB);
+				pp = (PIXEL*)pp1 + x1;
+#ifdef INTERP_Z
+				pz = pz1 + x1;
+				z = z1;
+#endif
+#ifdef INTERP_RGB
+				or1 = r1;
+				og1 = g1;
+				ob1 = b1;
+#endif
+#ifdef INTERP_ST
+				s = s1;
+				t = t1;
+#endif
+#ifdef INTERP_STZ
+//				sz = sz1; //What is SZ used for?
+//				tz = tz1; //What is TZ used for?
+#endif
+				while (n >= 3) {
+					PUT_PIXEL(0); /*the_x++;*/
+					PUT_PIXEL(1); /*the_x++;*/
+					PUT_PIXEL(2); /*the_x++;*/
+					PUT_PIXEL(3); /*the_x++;*/
+#ifdef INTERP_Z
+					pz += 4;
+#endif
+//					pp = (PIXEL*)((GLbyte*)pp + 4 * PS_ZB);
+					pp += 4;
+					n -= 4;
+				}
+				while (n >= 0) {
+					PUT_PIXEL(0); /*the_x++;*/
+#ifdef INTERP_Z
+					//pz += 1;
+					pz++;
+#endif
+					/*pp = (PIXEL*)((GLbyte*)pp + PS_ZB);*/
+					pp++;
+					n--;
+				}
+			}
+#else
+			DRAW_LINE(); 
+#endif
+
+			/* left edge */
+			error += derror;
+			if (error > 0) {
+				error -= 0x10000;
+				x1 += dxdy_max;
+#ifdef INTERP_Z
+				z1 += dzdl_max;
+#endif
+#ifdef INTERP_RGB
+				r1 += drdl_max;
+				g1 += dgdl_max;
+				b1 += dbdl_max;
+#endif
+#ifdef INTERP_ST
+				s1 += dsdl_max;
+				t1 += dtdl_max;
+#endif
+#ifdef INTERP_STZ
+				sz1 += dszdl_max;
+				tz1 += dtzdl_max;
+#endif
+			} else {
+				x1 += dxdy_min;
+#ifdef INTERP_Z
+				z1 += dzdl_min;
+#endif
+#ifdef INTERP_RGB
+				r1 += drdl_min;
+				g1 += dgdl_min;
+				b1 += dbdl_min;
+#endif
+#ifdef INTERP_ST
+				s1 += dsdl_min;
+				t1 += dtdl_min;
+#endif
+#ifdef INTERP_STZ
+				sz1 += dszdl_min;
+				tz1 += dtzdl_min;
+#endif
+			}
+
+			/* right edge */
+			x2 += dx2dy2;
+
+			/* screen coordinates */
+			//pp1 = (PIXEL*)((GLbyte*)pp1 + zb->linesize);
+			pp1 += zb->xsize;
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+			the_y++;
+#endif
+			pz1 += zb->xsize;
+		}
+	}
+}
+
+#undef INTERP_Z
+#undef INTERP_RGB
+#undef INTERP_ST
+#undef INTERP_STZ
+
+#undef DRAW_INIT
+#undef DRAW_LINE
+#undef PUT_PIXEL