shithub: tinygl

Download patch

ref: d84b421997defe3e962878949039dba0002faa34
parent: 2ff44fe298a8040c0b0870915939ea3c1fc978a2
author: MHS <gek@katherine>
date: Tue Mar 23 10:24:11 EDT 2021

Automatic commit.

--- a/include/zfeatures.h
+++ b/include/zfeatures.h
@@ -75,7 +75,11 @@
 #define TGL_FEATURE_MULTITHREADED_ZB_COPYBUFFER 0
 //Enable stdalign
 #define TGL_FEATURE_ALIGNAS 1
-
+//Optimization hint- cost of branching.
+//0- branching has zero cost, avoid extraneous code.
+//1- Branching has some cost, allow some extraneous code
+//2- Branching has extreme cost, allow a lot of extraneous code. Modern processors work best on this setting.
+#define TGL_OPTIMIZATION_HINT_BRANCH_COST 2
 
 //Disable it for TinyC
 #ifdef __TINYC__
--- a/src/clip.c
+++ b/src/clip.c
@@ -20,13 +20,16 @@
 		v->zp.z = (GLint)(v->pc.Z * winv * c->viewport.scale.Z + c->viewport.trans.Z);
 	}
 	/* color */
-	v->zp.r = (GLint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
-	v->zp.g = (GLint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
-	v->zp.b = (GLint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+	v->zp.r = (GLuint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+	v->zp.g = (GLuint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+	v->zp.b = (GLuint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
 
 	/* texture */
 
-	if (c->texture_2d_enabled) {
+#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 1
+	if (c->texture_2d_enabled) 
+#endif
+	{
 		v->zp.s = (GLint)(v->tex_coord.X * (ZB_POINT_S_MAX - ZB_POINT_S_MIN) + ZB_POINT_S_MIN); //MARKED
 		v->zp.t = (GLint)(v->tex_coord.Y * (ZB_POINT_T_MAX - ZB_POINT_T_MIN) + ZB_POINT_T_MIN); //MARKED
 	}
@@ -138,7 +141,7 @@
 	q->pc.Y = p0->pc.Y + (p1->pc.Y - p0->pc.Y) * t;
 	q->pc.Z = p0->pc.Z + (p1->pc.Z - p0->pc.Z) * t;
 	q->pc.W = p0->pc.W + (p1->pc.W - p0->pc.W) * t;
-
+#pragma omp simd
 	for(int i = 0; i < 3; i++)
 		q->color.v[i] = p0->color.v[i] + (p1->color.v[i] - p0->color.v[i]) * t;
 }
--- a/src/get.c
+++ b/src/get.c
@@ -56,6 +56,7 @@
 const GLubyte* extensions_string = (const GLubyte*)"TGL_TEXTURE "
 "TGL_SMOOTHSHADING "
 "TGL_LIGHTING "
+"TGL_OPTIMIZATION_HINT_BRANCH_COST=" xstr(TGL_OPTIMIZATION_HINT_BRANCH_COST) " "
 #if TGL_FEATURE_ERROR_CHECK == 1
 "TGL_FEATURE_ERROR_CHECK "
 #endif 
--- a/src/light.c
+++ b/src/light.c
@@ -280,7 +280,7 @@
 			d.Y = l->position.v[1] - v->ec.v[1];
 			d.Z = l->position.v[2] - v->ec.v[2];
 #if TGL_FEATURE_FISR == 1
-			tmp = clampf(fastInvSqrt(d.X * d.X + d.Y * d.Y + d.Z * d.Z),0,1); //FISR IMPL, MATCHED!
+			tmp = fastInvSqrt(d.X * d.X + d.Y * d.Y + d.Z * d.Z); //FISR IMPL, MATCHED!
 			{
 				d.X *= tmp;
 				d.Y *= tmp;
@@ -354,7 +354,7 @@
 #endif
 					dot_spec = clampf(dot_spec, 0, 1);
 #if TGL_FEATURE_FISR == 1
-					tmp = clampf(fastInvSqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z),0,1); //FISR IMPL, MATCHED!
+					tmp = fastInvSqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z); //FISR IMPL, MATCHED!
 					//if (tmp < 1E+3) 
 					{
 						dot_spec = dot_spec * tmp;
--- a/src/vertex.c
+++ b/src/vertex.c
@@ -68,15 +68,15 @@
 			gl_M4_Inv(&tmp, c->matrix_stack_ptr[0]);
 			gl_M4_Transpose(&c->matrix_model_view_inv, &tmp);
 		} else {
-			//GLfloat* m = &c->matrix_model_projection.m[0][0];
+			GLfloat* m = &c->matrix_model_projection.m[0][0];
 			/* precompute projection matrix */
 			gl_M4_Mul(&c->matrix_model_projection, c->matrix_stack_ptr[1], c->matrix_stack_ptr[0]);
 			/* test to accelerate computation */
 			c->matrix_model_projection_no_w_transform = 0;
-			//if (m[12] == 0.0 && m[13] == 0.0 && m[14] == 0.0)
-			if(c->matrix_model_projection.m[3][0] == 0.0 &&
-				c->matrix_model_projection.m[3][1] == 0.0 &&
-				c->matrix_model_projection.m[3][2] == 0.0)
+			if (m[12] == 0.0 && m[13] == 0.0 && m[14] == 0.0)
+			//if(c->matrix_model_projection.m[3][0] == 0.0 &&
+			//	c->matrix_model_projection.m[3][1] == 0.0 &&
+			//	c->matrix_model_projection.m[3][2] == 0.0)
 				c->matrix_model_projection_no_w_transform = 1;
 		}
 
@@ -137,13 +137,15 @@
 		v->zp.z = (GLint)(v->pc.Z * winv * c->viewport.scale.Z + c->viewport.trans.Z);
 	}
 	
-	v->zp.r = (GLint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
-	v->zp.g = (GLint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
-	v->zp.b = (GLint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+	v->zp.r = (GLuint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+	v->zp.g = (GLuint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+	v->zp.b = (GLuint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
 
 	
-
-	if (c->texture_2d_enabled) {
+//#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 1
+	if (c->texture_2d_enabled) 
+//#endif
+	{
 		v->zp.s = (GLint)(v->tex_coord.X * (ZB_POINT_S_MAX - ZB_POINT_S_MIN) + ZB_POINT_S_MIN); //MARKED
 		v->zp.t = (GLint)(v->tex_coord.Y * (ZB_POINT_T_MAX - ZB_POINT_T_MIN) + ZB_POINT_T_MIN); //MARKED
 	}
@@ -153,7 +155,10 @@
 	GLfloat* m;
 	GLContext* c = gl_get_context();
 
-	if (c->lighting_enabled) {
+//#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 2
+	if (c->lighting_enabled) 
+//#endif
+	{
 		/* eye coordinates needed for lighting */
 		V4* n;
 		m = &c->matrix_stack_ptr[0]->m[0][0];
@@ -179,7 +184,9 @@
 		if (c->normalize_enabled) {
 			gl_V3_Norm_Fast(&v->normal);
 		}
-	} else {
+	} 
+//#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 2
+	else {
 		/* no eye coordinates needed, no normal */
 		/* NOTE: W = 1 is assumed */
 		m = &c->matrix_model_projection.m[0][0];
@@ -193,7 +200,7 @@
 			v->pc.W = (v->coord.X * m[12] + v->coord.Y * m[13] + v->coord.Z * m[14] + m[15]);
 		}
 	}
-
+//#endif
 	v->clip_code = gl_clipcode(v->pc.X, v->pc.Y, v->pc.Z, v->pc.W);
 }
 
--- a/src/zmath.c
+++ b/src/zmath.c
@@ -9,6 +9,7 @@
 /* ******* Gestion des matrices 4x4 ****** */
 
 void gl_M4_Id(M4* a) {
+/*
 	GLint i, j;
 #pragma omp simd collapse(2)
 	for (i = 0; i < 4; i++)
@@ -17,6 +18,14 @@
 				a->m[i][j] = 1.0;
 			else
 				a->m[i][j] = 0.0;
+*/
+const M4 c = (M4){{
+	{1,0,0,0},
+	{0,1,0,0},
+	{0,0,1,0},
+	{0,0,0,1},}
+	};
+*a = c;
 }
 
 GLint gl_M4_IsId(M4* a) {
@@ -211,7 +220,7 @@
 
 void gl_M4_Inv(M4* a, M4* b) {
 	M4 tmp;
-	memcpy(&tmp, b, 16 * sizeof(GLfloat));
+	memcpy(&tmp, b, sizeof(M4));
 	/*tmp=*b;*/
 	Matrix_Inv(&a->m[0][0], &tmp.m[0][0], 4);
 }
--- a/src/zmath.h
+++ b/src/zmath.h
@@ -88,7 +88,7 @@
 #endif
 
 
-static inline GLint gl_V3_Norm_Fast(V3* a) {
+static inline int gl_V3_Norm_Fast(V3* a) {
 	GLfloat n;
 #if TGL_FEATURE_FISR == 1
 	n = fastInvSqrt(a->X * a->X + a->Y * a->Y + a->Z * a->Z); //FISR