ref: 54e149ceacdc7a914c1610f2013d79d21a9ae264
parent: b70c9722405939a40ba894d1e734dea355088ffc
author: David <gek@katherine>
date: Thu Feb 18 20:46:46 EST 2021
Fast Inverse Square Root
--- a/SDL_Examples/gears.c
+++ b/SDL_Examples/gears.c
@@ -268,6 +268,15 @@
// glEnable( GL_NORMALIZE );
}
+static inline GLfloat TEST_fastInvSqrt(float x){
+ GLint i; GLfloat y;
+ memcpy(&i, &x, 4);
+ i = 0x5f3759df - (i>>1);
+ //y = (union{GLint l; GLfloat f; }){i}.f;
+ memcpy(&y, &i, 4);
+ return y * (1.5F - 0.5F * x * y * y);
+}
+
int main(int argc, char** argv) {
// initialize SDL video:
int winSizeX = 640;
@@ -420,9 +429,12 @@
// main loop:
int isRunning = 1;
+ //float test = 0;
while (isRunning) {
++frames;
tNow = SDL_GetTicks();
+ // test = TEST_fastInvSqrt(tNow);
+ // printf("\n%f",test);
// do event handling:
SDL_Event evt;
while (SDL_PollEvent(&evt))
--- a/src/clip.c
+++ b/src/clip.c
@@ -67,9 +67,11 @@
q->pc.Z = p0->pc.Z + (p1->pc.Z - p0->pc.Z) * t;
q->pc.W = p0->pc.W + (p1->pc.W - p0->pc.W) * t;
- q->color.v[0] = p0->color.v[0] + (p1->color.v[0] - p0->color.v[0]) * t;
- q->color.v[1] = p0->color.v[1] + (p1->color.v[1] - p0->color.v[1]) * t;
- q->color.v[2] = p0->color.v[2] + (p1->color.v[2] - p0->color.v[2]) * t;
+ for(int i = 0; i < 3; i++)
+ q->color.v[i] = p0->color.v[i] + (p1->color.v[i] - p0->color.v[i]) * t;
+// q->color.v[0] = p0->color.v[0] + (p1->color.v[0] - p0->color.v[0]) * t;
+// q->color.v[1] = p0->color.v[1] + (p1->color.v[1] - p0->color.v[1]) * t;
+// q->color.v[2] = p0->color.v[2] + (p1->color.v[2] - p0->color.v[2]) * t;
}
/*
--- a/src/light.c
+++ b/src/light.c
@@ -115,7 +115,8 @@
l->norm_position.X = pos.X;
l->norm_position.Y = pos.Y;
l->norm_position.Z = pos.Z;
- gl_V3_Norm(&l->norm_position);
+ //gl_V3_Norm(&l->norm_position);
+ gl_V3_Norm_Fast(&l->norm_position);
}
} break;
case GL_SPOT_DIRECTION:
@@ -123,7 +124,7 @@
l->spot_direction.v[i] = v.v[i];
l->norm_spot_direction.v[i] = v.v[i];
}
- gl_V3_Norm(&l->norm_spot_direction);
+ gl_V3_Norm_Fast(&l->norm_spot_direction);
break;
case GL_SPOT_EXPONENT:
l->spot_exponent = v.v[0];
@@ -242,7 +243,9 @@
d.X = l->position.v[0] - v->ec.v[0];
d.Y = l->position.v[1] - v->ec.v[1];
d.Z = l->position.v[2] - v->ec.v[2];
- dist = sqrt(d.X * d.X + d.Y * d.Y + d.Z * d.Z);
+ tmp = fastInvSqrt(d.X * d.X + d.Y * d.Y + d.Z * d.Z);
+ //dist = sq_rt(d.X * d.X + d.Y * d.Y + d.Z * d.Z);
+ dist = 1.0f/tmp;
if (dist > 1E-3) {
tmp = 1 / dist;
d.X *= tmp;
@@ -285,7 +288,8 @@
vcoord.X = v->ec.X;
vcoord.Y = v->ec.Y;
vcoord.Z = v->ec.Z;
- gl_V3_Norm(&vcoord);
+ //gl_V3_Norm(&vcoord);
+ gl_V3_Norm_Fast(&vcoord);
s.X = d.X - vcoord.X;
s.Y = d.Y - vcoord.X;
s.Z = d.Z - vcoord.X;
@@ -300,10 +304,12 @@
if (dot_spec > 0) {
GLSpecBuf* specbuf;
GLint idx;
- tmp = sqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z);
- if (tmp > 1E-3) {
- dot_spec = dot_spec / tmp;
- }
+ //tmp = sqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z);
+ tmp = fastInvSqrt(s.X * s.X + s.Y * s.Y + s.Z * s.Z);
+ if (tmp < 1E+3) {
+// dot_spec = dot_spec / tmp;
+ dot_spec = dot_spec * tmp;
+ } else dot_spec = 0;
/* TODO: optimize */
/* testing specular buffer code */
@@ -311,7 +317,7 @@
specbuf = specbuf_get_buffer(c, m->shininess_i, m->shininess);
idx = (GLint)(dot_spec * SPECULAR_BUFFER_SIZE);
if (idx > SPECULAR_BUFFER_SIZE)
- idx = SPECULAR_BUFFER_SIZE;
+ idx = SPECULAR_BUFFER_SIZE; //NOTE by GEK: this is poorly written, it's actually 1 larger.
dot_spec = specbuf->buf[idx];
lR += dot_spec * l->specular.v[0] * m->specular.v[0];
lG += dot_spec * l->specular.v[1] * m->specular.v[1];
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -95,6 +95,8 @@
gl_matrix_update(c);
}
+
+
void glopRotate(GLContext* c, GLParam* p) {
M4 m;
GLfloat u[3];
@@ -132,14 +134,18 @@
GLfloat cost, sint;
/* normalize vector */
- GLfloat len = u[0] * u[0] + u[1] * u[1] + u[2] * u[2];
+ GLfloat len = u[0] + u[1] + u[2];
if (len == 0.0f)
return;
- len = 1.0f / sqrt(len);
+/*OLD
+
+*/
+//NEW
+ len = fastInvSqrt(len);
+ //len = 1.0f / sqrt(len);
u[0] *= len;
u[1] *= len;
u[2] *= len;
-
/* store cos and sin values */
cost = cos(angle);
sint = sin(angle);
--- a/src/vertex.c
+++ b/src/vertex.c
@@ -159,7 +159,7 @@
v->normal.Z = (n->X * m[8] + n->Y * m[9] + n->Z * m[10]);
if (c->normalize_enabled) {
- gl_V3_Norm(&v->normal);
+ gl_V3_Norm_Fast(&v->normal);
}
} else {
/* no eye coordinates needed, no normal */
--- a/src/zmath.c
+++ b/src/zmath.c
@@ -134,7 +134,7 @@
GLint i, j, k, l;
GLfloat max, tmp, t;
- /* identit�e dans r */
+ /* identit�e dans r */
for (i = 0; i < n * n; i++)
r[i] = 0;
for (i = 0; i < n; i++)
@@ -235,6 +235,20 @@
/* vector arithmetic */
+//NEW
+
+int gl_V3_Norm_Fast(V3* a) {
+ GLfloat n;
+ n = fastInvSqrt(a->X * a->X + a->Y * a->Y + a->Z * a->Z);
+ if (n == 0)
+ return 1;
+ a->X *= n;
+ a->Y *= n;
+ a->Z *= n;
+ return 0;
+}
+
+// OLD
int gl_V3_Norm(V3* a) {
GLfloat n;
n = sqrt(a->X * a->X + a->Y * a->Y + a->Z * a->Z);
--- a/src/zmath.h
+++ b/src/zmath.h
@@ -1,6 +1,8 @@
#ifndef __ZMATH__
#define __ZMATH__
#include "../include/GL/gl.h"
+#include <stdlib.h>
+#include <string.h> //For memcpy
/* Matrix & Vertex */
typedef struct {
@@ -43,6 +45,7 @@
void gl_M4_Transpose(M4* a, M4* b);
void gl_M4_Rotate(M4* c, GLfloat t, GLint u);
int gl_V3_Norm(V3* a);
+int gl_V3_Norm_Fast(V3* a);
V3 gl_V3_New(GLfloat x, GLfloat y, GLfloat z);
V4 gl_V4_New(GLfloat x, GLfloat y, GLfloat z, GLfloat w);
@@ -49,5 +52,14 @@
int gl_Matrix_Inv(GLfloat* r, GLfloat* m, GLint n);
+
+static inline GLfloat fastInvSqrt(float x){
+ GLint i; GLfloat y;
+ memcpy(&i, &x, 4);
+ i = 0x5f3759df - (i>>1);
+ //y = (union{GLint l; GLfloat f; }){i}.f;
+ memcpy(&y, &i, 4);
+ return y * (1.5F - 0.5F * x * y * y);
+}
#endif
// __ZMATH__
--- a/src/ztriangle.c
+++ b/src/ztriangle.c
@@ -56,7 +56,7 @@
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMPSIMP(zz, pz[_a], _a, color)) { \
TGL_BLEND_FUNC(color, (pp[_a])) /*pp[_a] = color;*/ \
if(zbdw)pz[_a] = zz; \
@@ -82,7 +82,7 @@
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
pp[_a] = color; \
if(zbdw)pz[_a] = zz; \
@@ -118,7 +118,7 @@
#if TGL_FEATURE_NO_DRAW_COLOR != 1
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
/*pp[_a] = RGB_TO_PIXEL(or1, og1, ob1);*/ \
TGL_BLEND_FUNC_RGB(or1, og1, ob1, (pp[_a])); \
@@ -132,7 +132,7 @@
#else
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
/*c = RGB_TO_PIXEL(or1, og1, ob1);*/ \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
/*pp[_a] = c;*/ \
@@ -151,25 +151,12 @@
#define DRAW_INIT() \
{}
- /*
- #define PUT_PIXEL(_a) \
- { \
- zz=z >> ZB_POINT_Z_FRAC_BITS; \
- if (ZCMP(zz,pz[_a],_a)) { \
- pp[_a] = RGB_TO_PIXEL(or1, og1, ob1);\
- pz[_a]=zz; \
- }\
- z+=dzdx; \
- og1+=dgdx; \
- or1+=drdx; \
- ob1+=dbdx; \
- }
- */
+
#if TGL_FEATURE_NO_DRAW_COLOR != 1
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
/*pp[_a] = RGB_TO_PIXEL(or1, og1, ob1);*/ \
TGL_BLEND_FUNC_RGB(or1, og1, ob1, (pp[_a])); \
@@ -184,7 +171,7 @@
#else
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
/*c = RGB_TO_PIXEL(or1, og1, ob1);*/ \
if (ZCMPSIMP(zz, pz[_a], _a, c)) { \
/*pp[_a] = c;*/ \
@@ -226,7 +213,7 @@
#if TGL_FEATURE_NO_DRAW_COLOR != 1
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
pp[_a] = RGB_TO_PIXEL(or1, og1, ob1); \
if(zbdw)pz[_a] = zz; \
@@ -239,7 +226,7 @@
#else
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
/*c = RGB_TO_PIXEL(or1, og1, ob1);*/ \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
pp[_a] = RGB_TO_PIXEL(or1,og1,ob1); \
@@ -275,7 +262,7 @@
#if TGL_FEATURE_NO_DRAW_COLOR != 1
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMPSIMP(zz, pz[_a], _a, 0)) { \
pp[_a] = RGB_TO_PIXEL(or1, og1, ob1); \
\
@@ -289,7 +276,7 @@
#else
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
/*c = RGB_TO_PIXEL(or1, og1, ob1);*/ \
if (ZCMPSIMP(zz, pz[_a], _a, c)) { \
/*pp[_a] = c;*/ \
@@ -342,7 +329,7 @@
//#if TGL_FEATURE_NO_DRAW_COLOR != 1
//#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMP(zz, pz[_a], _a, 0)) { \
pp[_a] = texture[((t & 0x3FC00000) | s) >> 14]; \
pz[_a] = zz; \
@@ -354,7 +341,7 @@
//#else
//#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
c = texture[((t & 0x3FC00000) | s) >> 14]; \
if (ZCMP(zz, pz[_a], _a, c)) { \
pp[_a] = c; \
@@ -376,6 +363,68 @@
#if 1 // IF 1
+#define DRAW_LINE_TRI_TEXTURED() \
+ { \
+ register GLushort* pz; \
+ register PIXEL* pp; \
+ register GLuint s, t, z; \
+ register GLint n; \
+ OR1OG1OB1DECL \
+ GLfloat sz, tz, fz, zinv; \
+ n = (x2 >> 16) - x1; \
+ fz = (GLfloat)z1; \
+ zinv = 1.0 / fz; \
+ pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB); \
+ pz = pz1 + x1; \
+ z = z1; \
+ sz = sz1; \
+ tz = tz1; \
+ while (n >= (NB_INTERP - 1)) { \
+ register GLint dsdx, dtdx; \
+ { \
+ GLfloat ss, tt; \
+ ss = (sz * zinv); \
+ tt = (tz * zinv); \
+ s = (GLint)ss; \
+ t = (GLint)tt; \
+ dsdx = (GLint)((dszdx - ss * fdzdx) * zinv); \
+ dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv); \
+ } \
+ fz += fndzdx; \
+ zinv = 1.0 / fz; \
+ PUT_PIXEL(0); /*the_x++;*/ \
+ PUT_PIXEL(1); /*the_x++;*/ \
+ PUT_PIXEL(2); /*the_x++;*/ \
+ PUT_PIXEL(3); /*the_x++;*/ \
+ PUT_PIXEL(4); /*the_x++;*/ \
+ PUT_PIXEL(5); /*the_x++;*/ \
+ PUT_PIXEL(6); /*the_x++;*/ \
+ PUT_PIXEL(7); /*the_x-=7;*/ \
+ pz += NB_INTERP; \
+ pp = (PIXEL*)((GLbyte*)pp + NB_INTERP * PSZB); /*the_x+=NB_INTERP * PSZB;*/ \
+ n -= NB_INTERP; \
+ sz += ndszdx; \
+ tz += ndtzdx; \
+ } \
+ { register GLint dsdx, dtdx; \
+ { \
+ GLfloat ss, tt; \
+ ss = (sz * zinv); \
+ tt = (tz * zinv); \
+ s = (GLint)ss; \
+ t = (GLint)tt; \
+ dsdx = (GLint)((dszdx - ss * fdzdx) * zinv); \
+ dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv); \
+ } \
+ while (n >= 0) { \
+ PUT_PIXEL(0); \
+ pz += 1; \
+ pp = (PIXEL*)((GLbyte*)pp + PSZB); \
+ n -= 1; \
+ } \
+ } \
+ } //EOF draw line
+
void ZB_fillTriangleMappingPerspective(ZBuffer* zb, ZBufferPoint* p0, ZBufferPoint* p1, ZBufferPoint* p2) {
PIXEL* texture;
GLfloat fdzdx, fndzdx, ndszdx, ndtzdx;
@@ -387,8 +436,8 @@
#endif
#define INTERP_Z
#define INTERP_STZ
-#define INTERP_RGB
+
#define NB_INTERP 8
#define DRAW_INIT() \
@@ -400,12 +449,13 @@
ndtzdx = NB_INTERP * dtzdx; \
}
#if TGL_FEATURE_LIT_TEXTURES == 1
+#define INTERP_RGB
#define OR1OG1OB1DECL \
register GLuint or1, og1, ob1; \
or1 = r1; \
og1 = g1; \
ob1 = b1;
-#define OR1G1B1INCR \
+#define OR1G1B1INCR \
og1 += dgdx; \
or1 += drdx; \
ob1 += dbdx;
@@ -415,11 +465,12 @@
#define or1 0xffff
#define og1 0xffff
#define ob1 0xffff
+#undef INTERP_RGB
#endif
#if TGL_FEATURE_NO_DRAW_COLOR != 1
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMP(zz, pz[_a], _a, 0)) { \
/*pp[_a] = RGB_MIX_FUNC(or1, og1, ob1, *(PIXEL*)((GLbyte*)texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH))));*/ \
TGL_BLEND_FUNC(RGB_MIX_FUNC(or1, og1, ob1, *(PIXEL*)((GLbyte*)texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH)))), pp[_a]) \
@@ -433,7 +484,7 @@
#else
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
c = *(PIXEL*)((GLbyte*)texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH))); \
if (ZCMP(zz, pz[_a], _a, c)) { \
/*pp[_a] = RGB_MIX_FUNC(or1, og1, ob1, c);*/ \
@@ -446,65 +497,8 @@
OR1G1B1INCR \
}
#endif
+#define DRAW_LINE() {DRAW_LINE_TRI_TEXTURED()}
-#define DRAW_LINE() \
- { \
- register GLushort* pz; \
- register PIXEL* pp; \
- register GLuint s, t, z, zz; \
- register GLint n, dsdx, dtdx; \
- OR1OG1OB1DECL \
- GLfloat sz, tz, fz, zinv; \
- n = (x2 >> 16) - x1; \
- fz = (GLfloat)z1; \
- zinv = 1.0 / fz; \
- pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB); \
- pz = pz1 + x1; \
- z = z1; \
- sz = sz1; \
- tz = tz1; \
- while (n >= (NB_INTERP - 1)) { \
- { \
- GLfloat ss, tt; \
- ss = (sz * zinv); \
- tt = (tz * zinv); \
- s = (GLint)ss; \
- t = (GLint)tt; \
- dsdx = (GLint)((dszdx - ss * fdzdx) * zinv); \
- dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv); \
- fz += fndzdx; \
- zinv = 1.0 / fz; \
- } \
- PUT_PIXEL(0); /*the_x++;*/ \
- PUT_PIXEL(1); /*the_x++;*/ \
- PUT_PIXEL(2); /*the_x++;*/ \
- PUT_PIXEL(3); /*the_x++;*/ \
- PUT_PIXEL(4); /*the_x++;*/ \
- PUT_PIXEL(5); /*the_x++;*/ \
- PUT_PIXEL(6); /*the_x++;*/ \
- PUT_PIXEL(7); /*the_x-=7;*/ \
- pz += NB_INTERP; \
- pp = (PIXEL*)((GLbyte*)pp + NB_INTERP * PSZB); /*the_x+=NB_INTERP * PSZB;*/ \
- n -= NB_INTERP; \
- sz += ndszdx; \
- tz += ndtzdx; \
- } \
- { \
- GLfloat ss, tt; \
- ss = (sz * zinv); \
- tt = (tz * zinv); \
- s = (GLint)ss; \
- t = (GLint)tt; \
- dsdx = (GLint)((dszdx - ss * fdzdx) * zinv); \
- dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv); \
- } \
- while (n >= 0) { \
- PUT_PIXEL(0); /*the_x += PSZB;*/ \
- pz += 1; \
- pp = (PIXEL*)((GLbyte*)pp + PSZB); \
- n -= 1; \
- } \
- }
#include "ztriangle.h"
}
@@ -557,7 +551,7 @@
#if TGL_FEATURE_NO_DRAW_COLOR != 1
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
if (ZCMP(zz, pz[_a], _a, 0)) { \
pp[_a] = RGB_MIX_FUNC(or1, og1, ob1, *(PIXEL*)((GLbyte*)texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH)))); \
if(zbdw) pz[_a] = zz; \
@@ -570,7 +564,7 @@
#else
#define PUT_PIXEL(_a) \
{ \
- zz = z >> ZB_POINT_Z_FRAC_BITS; \
+ register GLuint zz =z >> ZB_POINT_Z_FRAC_BITS; \
c = *(PIXEL*)((GLbyte*)texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH))); \
if (ZCMP(zz, pz[_a], _a, c)) { \
pp[_a] = RGB_MIX_FUNC(or1, og1, ob1, c); \
@@ -583,74 +577,13 @@
OR1G1B1INCR \
}
#endif
+#define DRAW_LINE() {DRAW_LINE_TRI_TEXTURED()}
-#define DRAW_LINE() \
- { \
- register GLushort* pz; \
- register PIXEL* pp; \
- register GLuint s, t, z, zz; \
- register GLint n, dsdx, dtdx; \
- OR1OG1OB1DECL \
- GLfloat sz, tz, fz, zinv; \
- n = (x2 >> 16) - x1; \
- fz = (GLfloat)z1; \
- zinv = 1.0 / fz; \
- pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB); \
- pz = pz1 + x1; \
- z = z1; \
- sz = sz1; \
- tz = tz1; \
- while (n >= (NB_INTERP - 1)) { \
- { \
- GLfloat ss, tt; \
- ss = (sz * zinv); \
- tt = (tz * zinv); \
- s = (GLint)ss; \
- t = (GLint)tt; \
- dsdx = (GLint)((dszdx - ss * fdzdx) * zinv); \
- dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv); \
- fz += fndzdx; \
- zinv = 1.0 / fz; \
- } \
- PUT_PIXEL(0); /*the_x++;*/ \
- PUT_PIXEL(1); /*the_x++;*/ \
- PUT_PIXEL(2); /*the_x++;*/ \
- PUT_PIXEL(3); /*the_x++;*/ \
- PUT_PIXEL(4); /*the_x++;*/ \
- PUT_PIXEL(5); /*the_x++;*/ \
- PUT_PIXEL(6); /*the_x++;*/ \
- PUT_PIXEL(7); /*the_x-=7;*/ \
- pz += NB_INTERP; \
- pp = (PIXEL*)((GLbyte*)pp + NB_INTERP * PSZB); /*the_x+=NB_INTERP * PSZB;*/ \
- n -= NB_INTERP; \
- sz += ndszdx; \
- tz += ndtzdx; \
- } \
- { \
- GLfloat ss, tt; \
- ss = (sz * zinv); \
- tt = (tz * zinv); \
- s = (GLint)ss; \
- t = (GLint)tt; \
- dsdx = (GLint)((dszdx - ss * fdzdx) * zinv); \
- dtdx = (GLint)((dtzdx - tt * fdzdx) * zinv); \
- } \
- while (n >= 0) { \
- PUT_PIXEL(0); /*the_x += PSZB;*/ \
- pz += 1; \
- pp = (PIXEL*)((GLbyte*)pp + PSZB); \
- n -= 1; \
- } \
- }
#include "ztriangle.h"
}
-
-
-
#endif // if 1
-// Not maintained by Gek
--- a/src/ztriangle.h
+++ b/src/ztriangle.h
@@ -1,15 +1,31 @@
/*
- * We draw a triangle with various GLinterpolations
+ * An eXtReMeLy complicated, delicate, tuned triangle rasterizer
+ * Aight, so basically this is the most complicated code you'll ever read in your life.
+ * The lifetime of variables has been... SUPER Optimized, that's why there's so many random ass curly braces everywhere.
+ * Yes, it is necessary to do that. This code is extremely delicate
+ * and even a minor fuck-up is gonna tank the framerate
+
+Before committing any changes, run gears, model, and texture on your changed code to make sure you didn't
+fuck up!
+
+Things to keep in mind:
+ 1) Tight control of the lifetimes of variables lets us use registers more often and memory less
+ 2) Doing the same operation on multiple items is faster than doing different things on different items, generally, because
+ they will be able to take advantage of any/all applicable SIMD/vector ops on your hardware.
+ 3) Divide operations are vastly more expensive than add/sub/bitwise/etc
+ 4) Bit shifting is your friend, it's the fast way to multiply or divide by 2.
+ 5) Fixed point math is used for the depth "z" buffer
+ 6) We're not just using floats for everything because this is still supposed to be fast on platforms without SSE2
+ 7)
*/
{
- ZBufferPoint *pr1, *pr2, *l1, *l2;
- GLfloat fdx1, fdx2, fdy1, fdy2, fz, d1, d2;
+ GLfloat fdx1, fdx2, fdy1, fdy2;
GLushort* pz1;
PIXEL* pp1;
- GLint part, update_left, update_right;
+ GLint update_left, update_right;
- GLint nb_lines, dx1, dy1, tmp, dx2, dy2;
+ GLint nb_lines, dx1, dy1, dx2, dy2;
#if TGL_FEATURE_POLYGON_STIPPLE == 1
GLushort the_y;
#endif
@@ -36,98 +52,105 @@
#endif
/* we sort the vertex with increasing y */
- {
- ZBufferPoint *t;
if (p1->y < p0->y) {
- t = p0;
+ ZBufferPoint *t = p0;
p0 = p1;
p1 = t;
}
if (p2->y < p0->y) {
- t = p2;
+ ZBufferPoint *t = p2;
p2 = p1;
p1 = p0;
p0 = t;
} else if (p2->y < p1->y) {
- t = p1;
+ ZBufferPoint *t = p1;
p1 = p2;
p2 = t;
}
- }
+
/* we compute dXdx and dXdy for all GLinterpolated values */
+ fdx1 = p1->x - p0->x;//fdx1 first usage (VALUE_FDX1_USED)
+ fdy1 = p1->y - p0->y;//fdy1 first usage (VALUE_FDY1_USED)
- fdx1 = p1->x - p0->x;
- fdy1 = p1->y - p0->y;
-
fdx2 = p2->x - p0->x;
fdy2 = p2->y - p0->y;
-
- fz = fdx1 * fdy2 - fdx2 * fdy1;
+
+ GLfloat fz = fdx1 * fdy2 - fdx2 * fdy1;//fz first usage
if (fz == 0)
return;
- fz = 1.0 / fz;
-
+ fz = 1.0 / fz; //value of fz is used (VALUE_FZ_USED)
+ //for these (VALUE_FZ_USED)
fdx1 *= fz;
fdy1 *= fz;
fdx2 *= fz;
fdy2 *= fz;
-
+ //and then
#ifdef INTERP_Z
- d1 = p1->z - p0->z;
- d2 = p2->z - p0->z;
+{
+ GLfloat d1 = p1->z - p0->z; //d1 first usage
+ GLfloat d2 = p2->z - p0->z;
dzdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dzdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
#endif
#ifdef INTERP_RGB
+{GLfloat d1, d2;
d1 = p1->r - p0->r;
d2 = p2->r - p0->r;
drdx = (GLint)(fdy2 * d1 - fdy1 * d2);
drdy = (GLint)(fdx1 * d2 - fdx2 * d1);
-
+}
+{GLfloat d1, d2;
d1 = p1->g - p0->g;
d2 = p2->g - p0->g;
dgdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dgdy = (GLint)(fdx1 * d2 - fdx2 * d1);
-
+}
+{GLfloat d1, d2;
d1 = p1->b - p0->b;
d2 = p2->b - p0->b;
dbdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dbdy = (GLint)(fdx1 * d2 - fdx2 * d1);
-
+}
#endif
#ifdef INTERP_ST
+{GLfloat d1, d2;
d1 = p1->s - p0->s;
d2 = p2->s - p0->s;
dsdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dsdy = (GLint)(fdx1 * d2 - fdx2 * d1);
-
+}
+{GLfloat d1, d2;
d1 = p1->t - p0->t;
d2 = p2->t - p0->t;
dtdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dtdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
#endif
#ifdef INTERP_STZ
{
- GLfloat zz;
- zz = (GLfloat)p0->z;
- p0->sz = (GLfloat)p0->s * zz;
- p0->tz = (GLfloat)p0->t * zz;
- zz = (GLfloat)p1->z;
- p1->sz = (GLfloat)p1->s * zz;
- p1->tz = (GLfloat)p1->t * zz;
- zz = (GLfloat)p2->z;
- p2->sz = (GLfloat)p2->s * zz;
- p2->tz = (GLfloat)p2->t * zz;
-
+ GLfloat zedzed;
+ zedzed = (GLfloat)p0->z;
+ p0->sz = (GLfloat)p0->s * zedzed;
+ p0->tz = (GLfloat)p0->t * zedzed;
+ zedzed = (GLfloat)p1->z;
+ p1->sz = (GLfloat)p1->s * zedzed;
+ p1->tz = (GLfloat)p1->t * zedzed;
+ zedzed = (GLfloat)p2->z;
+ p2->sz = (GLfloat)p2->s * zedzed;
+ p2->tz = (GLfloat)p2->t * zedzed;
+ }
+ {GLfloat d1, d2;
d1 = p1->sz - p0->sz;
d2 = p2->sz - p0->sz;
dszdx = (fdy2 * d1 - fdy1 * d2);
dszdy = (fdx1 * d2 - fdx2 * d1);
-
+ }
+ {GLfloat d1, d2;
d1 = p1->tz - p0->tz;
d2 = p2->tz - p0->tz;
dtzdx = (fdy2 * d1 - fdy1 * d2);
@@ -137,7 +160,7 @@
/* screen coordinates */
- pp1 = (PIXEL*)((GLbyte*)zb->pbuf + zb->linesize * p0->y);
+ pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y; //pp1 first usage
#if TGL_FEATURE_POLYGON_STIPPLE == 1
the_y = p0->y;
#endif
@@ -144,106 +167,105 @@
pz1 = zb->zbuf + p0->y * zb->xsize;
DRAW_INIT();
+//part used here and down.
+ for (GLint part = 0; part < 2; part++) {
+ {ZBufferPoint *pr1, *pr2, *l1, *l2; //BEGINNING OF LIFETIME FOR ZBUFFERPOINT VARS!!!
+ if (part == 0) {
+ if (fz > 0) { //Here! (VALUE_FZ_USED)
+ update_left = 1;
+ update_right = 1;
+ l1 = p0; //MARK l1 first usage
+ l2 = p2; //MARK l2 first usage
+ pr1 = p0; //MARK first usage of pr1
+ pr2 = p1; //MARK first usage pf pr2
+ } else {
+ update_left = 1;
+ update_right = 1;
+ l1 = p0;
+ l2 = p1;
+ pr1 = p0;
+ pr2 = p2;
+ }
+ nb_lines = p1->y - p0->y;
+ } else { //SECOND PART~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ /* second part */
+ if (fz > 0) { //fz last usage (VALUE_FZ_USED)
+ update_left = 0;
+ update_right = 1;
+ pr1 = p1;
+ pr2 = p2;
+ } else {
+ update_left = 1;
+ update_right = 0;
+ l1 = p1;
+ l2 = p2;
+ }
+ nb_lines = p2->y - p1->y + 1;
+ } //EOF SECOND PART
- for (part = 0; part < 2; part++) {
- if (part == 0) {
- if (fz > 0) {
- update_left = 1;
- update_right = 1;
- l1 = p0;
- l2 = p2;
- pr1 = p0;
- pr2 = p1;
- } else {
- update_left = 1;
- update_right = 1;
- l1 = p0;
- l2 = p1;
- pr1 = p0;
- pr2 = p2;
- }
- nb_lines = p1->y - p0->y;
- } else {
- /* second part */
- if (fz > 0) {
- update_left = 0;
- update_right = 1;
- pr1 = p1;
- pr2 = p2;
- } else {
- update_left = 1;
- update_right = 0;
- l1 = p1;
- l2 = p2;
- }
- nb_lines = p2->y - p1->y + 1;
- }
-
- /* compute the values for the left edge */
-
- if (update_left) {
- dy1 = l2->y - l1->y;
- dx1 = l2->x - l1->x;
- if (dy1 > 0)
- tmp = (dx1 << 16) / dy1;
- else
- tmp = 0;
- x1 = l1->x;
- error = 0;
- derror = tmp & 0x0000ffff;
- dxdy_min = tmp >> 16;
- dxdy_max = dxdy_min + 1;
-
+ /* compute the values for the left edge */
+ //pr1 and pr2 are not used inside this area.
+ if (update_left) {
+ {
+ register GLint tmp;
+ dy1 = l2->y - l1->y;
+ dx1 = l2->x - l1->x;
+ if (dy1 > 0)
+ tmp = (dx1 << 16) / dy1;
+ else
+ tmp = 0;
+ x1 = l1->x;
+ error = 0;
+ derror = tmp & 0x0000ffff;
+ dxdy_min = tmp >> 16;
+ }
+ dxdy_max = dxdy_min + 1;
#ifdef INTERP_Z
- z1 = l1->z;
- dzdl_min = (dzdy + dzdx * dxdy_min);
- dzdl_max = dzdl_min + dzdx;
+ z1 = l1->z;
+ dzdl_min = (dzdy + dzdx * dxdy_min);
+ dzdl_max = dzdl_min + dzdx;
#endif
#ifdef INTERP_RGB
- r1 = l1->r;
- drdl_min = (drdy + drdx * dxdy_min);
- drdl_max = drdl_min + drdx;
-
- g1 = l1->g;
- dgdl_min = (dgdy + dgdx * dxdy_min);
- dgdl_max = dgdl_min + dgdx;
-
- b1 = l1->b;
- dbdl_min = (dbdy + dbdx * dxdy_min);
- dbdl_max = dbdl_min + dbdx;
+ r1 = l1->r;
+ drdl_min = (drdy + drdx * dxdy_min);
+ drdl_max = drdl_min + drdx;
+ g1 = l1->g;
+ dgdl_min = (dgdy + dgdx * dxdy_min);
+ dgdl_max = dgdl_min + dgdx;
+ b1 = l1->b;
+ dbdl_min = (dbdy + dbdx * dxdy_min);
+ dbdl_max = dbdl_min + dbdx;
#endif
#ifdef INTERP_ST
- s1 = l1->s;
- dsdl_min = (dsdy + dsdx * dxdy_min);
- dsdl_max = dsdl_min + dsdx;
-
- t1 = l1->t;
- dtdl_min = (dtdy + dtdx * dxdy_min);
- dtdl_max = dtdl_min + dtdx;
+ s1 = l1->s;
+ dsdl_min = (dsdy + dsdx * dxdy_min);
+ dsdl_max = dsdl_min + dsdx;
+ t1 = l1->t;
+ dtdl_min = (dtdy + dtdx * dxdy_min);
+ dtdl_max = dtdl_min + dtdx;
#endif
#ifdef INTERP_STZ
- sz1 = l1->sz;
- dszdl_min = (dszdy + dszdx * dxdy_min);
- dszdl_max = dszdl_min + dszdx;
-
- tz1 = l1->tz;
- dtzdl_min = (dtzdy + dtzdx * dxdy_min);
- dtzdl_max = dtzdl_min + dtzdx;
+ sz1 = l1->sz;
+ dszdl_min = (dszdy + dszdx * dxdy_min);
+ dszdl_max = dszdl_min + dszdx;
+ tz1 = l1->tz;
+ dtzdl_min = (dtzdy + dtzdx * dxdy_min);
+ dtzdl_max = dtzdl_min + dtzdx;
#endif
- }
+ } //EOF update left
+ //Is l1 used after update_left?
+ /* compute values for the right edge */
- /* compute values for the right edge */
-
- if (update_right) {
- dx2 = (pr2->x - pr1->x);
- dy2 = (pr2->y - pr1->y);
- if (dy2 > 0)
- dx2dy2 = (dx2 << 16) / dy2;
- else
- dx2dy2 = 0;
- x2 = pr1->x << 16;
- }
-
+ if (update_right) {
+ dx2 = (pr2->x - pr1->x);
+ dy2 = (pr2->y - pr1->y); //LAST USAGE OF PR2
+ if (dy2 > 0)
+ dx2dy2 = (dx2 << 16) / dy2;
+ else
+ dx2dy2 = 0;
+ x2 = pr1->x << 16; //LAST USAGE OF PR1
+ } //EOF update right
+ } //End of lifetime for ZBufferpoints
/* we draw all the scan line of the part */
while (nb_lines > 0) {
@@ -255,7 +277,7 @@
register GLint n;
#ifdef INTERP_Z
register GLushort* pz;
- register GLuint z, zz;
+ register GLuint z;
#endif
#ifdef INTERP_RGB
register GLuint or1, og1, ob1;
@@ -264,11 +286,12 @@
register GLuint s, t;
#endif
#ifdef INTERP_STZ
- GLfloat sz, tz;
+ //GLfloat sz, tz; //These variables go unused in this draw line function.
#endif
n = (x2 >> 16) - x1;
- pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB);
+ //pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB);
+ pp = (PIXEL*)pp1 + x1;
#ifdef INTERP_Z
pz = pz1 + x1;
z = z1;
@@ -283,8 +306,8 @@
t = t1;
#endif
#ifdef INTERP_STZ
- sz = sz1;
- tz = tz1;
+// sz = sz1; //What is SZ used for?
+// tz = tz1; //What is TZ used for?
#endif
while (n >= 3) {
PUT_PIXEL(0); /*the_x++;*/
@@ -294,7 +317,8 @@
#ifdef INTERP_Z
pz += 4;
#endif
- pp = (PIXEL*)((GLbyte*)pp + 4 * PSZB);
+// pp = (PIXEL*)((GLbyte*)pp + 4 * PSZB);
+ pp += 4;
n -= 4;
}
while (n >= 0) {
@@ -303,12 +327,11 @@
pz += 1;
#endif
pp = (PIXEL*)((GLbyte*)pp + PSZB);
- n -= 1;
+ n--;
}
}
- // the_y++;
#else
- DRAW_LINE(); // the_y++;
+ DRAW_LINE();
#endif
/* left edge */
@@ -359,8 +382,6 @@
pp1 = (PIXEL*)((GLbyte*)pp1 + zb->linesize);
#if TGL_FEATURE_POLYGON_STIPPLE == 1
the_y++;
-#else
-//#error POLYGONSTIPPLE_TESTING
#endif
pz1 += zb->xsize;
}