ref: d84b421997defe3e962878949039dba0002faa34
parent: 2ff44fe298a8040c0b0870915939ea3c1fc978a2
author: MHS <gek@katherine>
date: Tue Mar 23 10:24:11 EDT 2021
Automatic commit.
--- a/include/zfeatures.h
+++ b/include/zfeatures.h
@@ -75,7 +75,11 @@
#define TGL_FEATURE_MULTITHREADED_ZB_COPYBUFFER 0
//Enable stdalign
#define TGL_FEATURE_ALIGNAS 1
-
+//Optimization hint- cost of branching.
+//0- branching has zero cost, avoid extraneous code.
+//1- Branching has some cost, allow some extraneous code
+//2- Branching has extreme cost, allow a lot of extraneous code. Modern processors work best on this setting.
+#define TGL_OPTIMIZATION_HINT_BRANCH_COST 2
//Disable it for TinyC
#ifdef __TINYC__
--- a/src/clip.c
+++ b/src/clip.c
@@ -20,13 +20,16 @@
v->zp.z = (GLint)(v->pc.Z * winv * c->viewport.scale.Z + c->viewport.trans.Z);
}
/* color */
- v->zp.r = (GLint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
- v->zp.g = (GLint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
- v->zp.b = (GLint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+ v->zp.r = (GLuint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+ v->zp.g = (GLuint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+ v->zp.b = (GLuint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
/* texture */
- if (c->texture_2d_enabled) {
+#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 1
+ if (c->texture_2d_enabled)
+#endif
+ {
v->zp.s = (GLint)(v->tex_coord.X * (ZB_POINT_S_MAX - ZB_POINT_S_MIN) + ZB_POINT_S_MIN); //MARKED
v->zp.t = (GLint)(v->tex_coord.Y * (ZB_POINT_T_MAX - ZB_POINT_T_MIN) + ZB_POINT_T_MIN); //MARKED
}
@@ -138,7 +141,7 @@
q->pc.Y = p0->pc.Y + (p1->pc.Y - p0->pc.Y) * t;
q->pc.Z = p0->pc.Z + (p1->pc.Z - p0->pc.Z) * t;
q->pc.W = p0->pc.W + (p1->pc.W - p0->pc.W) * t;
-
+#pragma omp simd
for(int i = 0; i < 3; i++)
q->color.v[i] = p0->color.v[i] + (p1->color.v[i] - p0->color.v[i]) * t;
}
--- a/src/get.c
+++ b/src/get.c
@@ -56,6 +56,7 @@
const GLubyte* extensions_string = (const GLubyte*)"TGL_TEXTURE "
"TGL_SMOOTHSHADING "
"TGL_LIGHTING "
+"TGL_OPTIMIZATION_HINT_BRANCH_COST=" xstr(TGL_OPTIMIZATION_HINT_BRANCH_COST) " "
#if TGL_FEATURE_ERROR_CHECK == 1
"TGL_FEATURE_ERROR_CHECK "
#endif
--- a/src/light.c
+++ b/src/light.c
@@ -280,7 +280,7 @@
d.Y = l->position.v[1] - v->ec.v[1];
d.Z = l->position.v[2] - v->ec.v[2];
#if TGL_FEATURE_FISR == 1
- tmp = clampf(fastInvSqrt(d.X * d.X + d.Y * d.Y + d.Z * d.Z),0,1); //FISR IMPL, MATCHED!
+ tmp = fastInvSqrt(d.X * d.X + d.Y * d.Y + d.Z * d.Z); //FISR IMPL, MATCHED!
{
d.X *= tmp;
d.Y *= tmp;
@@ -354,7 +354,7 @@
#endif
dot_spec = clampf(dot_spec, 0, 1);
#if TGL_FEATURE_FISR == 1
- tmp = clampf(fastInvSqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z),0,1); //FISR IMPL, MATCHED!
+ tmp = fastInvSqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z); //FISR IMPL, MATCHED!
//if (tmp < 1E+3)
{
dot_spec = dot_spec * tmp;
--- a/src/vertex.c
+++ b/src/vertex.c
@@ -68,15 +68,15 @@
gl_M4_Inv(&tmp, c->matrix_stack_ptr[0]);
gl_M4_Transpose(&c->matrix_model_view_inv, &tmp);
} else {
- //GLfloat* m = &c->matrix_model_projection.m[0][0];
+ GLfloat* m = &c->matrix_model_projection.m[0][0];
/* precompute projection matrix */
gl_M4_Mul(&c->matrix_model_projection, c->matrix_stack_ptr[1], c->matrix_stack_ptr[0]);
/* test to accelerate computation */
c->matrix_model_projection_no_w_transform = 0;
- //if (m[12] == 0.0 && m[13] == 0.0 && m[14] == 0.0)
- if(c->matrix_model_projection.m[3][0] == 0.0 &&
- c->matrix_model_projection.m[3][1] == 0.0 &&
- c->matrix_model_projection.m[3][2] == 0.0)
+ if (m[12] == 0.0 && m[13] == 0.0 && m[14] == 0.0)
+ //if(c->matrix_model_projection.m[3][0] == 0.0 &&
+ // c->matrix_model_projection.m[3][1] == 0.0 &&
+ // c->matrix_model_projection.m[3][2] == 0.0)
c->matrix_model_projection_no_w_transform = 1;
}
@@ -137,13 +137,15 @@
v->zp.z = (GLint)(v->pc.Z * winv * c->viewport.scale.Z + c->viewport.trans.Z);
}
- v->zp.r = (GLint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
- v->zp.g = (GLint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
- v->zp.b = (GLint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+ v->zp.r = (GLuint)(v->color.v[0] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+ v->zp.g = (GLuint)(v->color.v[1] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
+ v->zp.b = (GLuint)(v->color.v[2] * COLOR_CORRECTED_MULT_MASK + COLOR_MIN_MULT) & COLOR_MASK;
-
- if (c->texture_2d_enabled) {
+//#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 1
+ if (c->texture_2d_enabled)
+//#endif
+ {
v->zp.s = (GLint)(v->tex_coord.X * (ZB_POINT_S_MAX - ZB_POINT_S_MIN) + ZB_POINT_S_MIN); //MARKED
v->zp.t = (GLint)(v->tex_coord.Y * (ZB_POINT_T_MAX - ZB_POINT_T_MIN) + ZB_POINT_T_MIN); //MARKED
}
@@ -153,7 +155,10 @@
GLfloat* m;
GLContext* c = gl_get_context();
- if (c->lighting_enabled) {
+//#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 2
+ if (c->lighting_enabled)
+//#endif
+ {
/* eye coordinates needed for lighting */
V4* n;
m = &c->matrix_stack_ptr[0]->m[0][0];
@@ -179,7 +184,9 @@
if (c->normalize_enabled) {
gl_V3_Norm_Fast(&v->normal);
}
- } else {
+ }
+//#if TGL_OPTIMIZATION_HINT_BRANCH_COST < 2
+ else {
/* no eye coordinates needed, no normal */
/* NOTE: W = 1 is assumed */
m = &c->matrix_model_projection.m[0][0];
@@ -193,7 +200,7 @@
v->pc.W = (v->coord.X * m[12] + v->coord.Y * m[13] + v->coord.Z * m[14] + m[15]);
}
}
-
+//#endif
v->clip_code = gl_clipcode(v->pc.X, v->pc.Y, v->pc.Z, v->pc.W);
}
--- a/src/zmath.c
+++ b/src/zmath.c
@@ -9,6 +9,7 @@
/* ******* Gestion des matrices 4x4 ****** */
void gl_M4_Id(M4* a) {
+/*
GLint i, j;
#pragma omp simd collapse(2)
for (i = 0; i < 4; i++)
@@ -17,6 +18,14 @@
a->m[i][j] = 1.0;
else
a->m[i][j] = 0.0;
+*/
+const M4 c = (M4){{
+ {1,0,0,0},
+ {0,1,0,0},
+ {0,0,1,0},
+ {0,0,0,1},}
+ };
+*a = c;
}
GLint gl_M4_IsId(M4* a) {
@@ -211,7 +220,7 @@
void gl_M4_Inv(M4* a, M4* b) {
M4 tmp;
- memcpy(&tmp, b, 16 * sizeof(GLfloat));
+ memcpy(&tmp, b, sizeof(M4));
/*tmp=*b;*/
Matrix_Inv(&a->m[0][0], &tmp.m[0][0], 4);
}
--- a/src/zmath.h
+++ b/src/zmath.h
@@ -88,7 +88,7 @@
#endif
-static inline GLint gl_V3_Norm_Fast(V3* a) {
+static inline int gl_V3_Norm_Fast(V3* a) {
GLfloat n;
#if TGL_FEATURE_FISR == 1
n = fastInvSqrt(a->X * a->X + a->Y * a->Y + a->Z * a->Z); //FISR