ref: acb9df2f3366456315a3bdb28b4903705ec3495d
parent: 8207a583af54b780757a278914e1deae8f2d1040
author: David <gek@katherine>
date: Fri Feb 19 12:07:00 EST 2021
Improvements
--- a/src/zbuffer.c
+++ b/src/zbuffer.c
@@ -20,10 +20,11 @@
if (zb == NULL)
return NULL;
- zb->xsize = xsize;
+ zb->xsize = xsize & ~3; //The xsize will ALWAYS be a multiple of four!
zb->ysize = ysize;
zb->mode = mode;
- zb->linesize = (xsize * PSZB + 3) & ~3;
+ //zb->linesize = (xsize * PSZB + 3) & ~3;
+ zb->linesize = (xsize * PSZB);
switch (mode) {
#if TGL_FEATURE_8_BITS ==1
@@ -94,7 +95,7 @@
zb->xsize = xsize;
zb->ysize = ysize;
- zb->linesize = (xsize * PSZB + 3) & ~3;
+ zb->linesize = (xsize * PSZB);
size = zb->xsize * zb->ysize * sizeof(GLushort);
--- a/src/ztriangle.c
+++ b/src/ztriangle.c
@@ -401,7 +401,8 @@
PUT_PIXEL(6); /*the_x++;*/ \
PUT_PIXEL(7); /*the_x-=7;*/ \
pz += NB_INTERP; \
- pp = (PIXEL*)((GLbyte*)pp + NB_INTERP * PSZB); /*the_x+=NB_INTERP * PSZB;*/ \
+ /*pp = (PIXEL*)((GLbyte*)pp + NB_INTERP * PSZB);*/ /*the_x+=NB_INTERP * PSZB;*/ \
+ pp += NB_INTERP; /*the_x+=NB_INTERP * PSZB;*/ \
n -= NB_INTERP; \
sz += ndszdx; \
tz += ndtzdx; \
@@ -419,7 +420,8 @@
while (n >= 0) { \
PUT_PIXEL(0); \
pz += 1; \
- pp = (PIXEL*)((GLbyte*)pp + PSZB); \
+ /*pp = (PIXEL*)((GLbyte*)pp + PSZB);*/ \
+ pp ++; \
n -= 1; \
} \
} \
--- a/src/ztriangle.h
+++ b/src/ztriangle.h
@@ -20,7 +20,7 @@
*/
{
- GLfloat fdx1, fdx2, fdy1, fdy2;
+ GLfloat fdx1, fdx2, fdy1, fdy2, d1, d2;
GLushort* pz1;
PIXEL* pp1;
GLint update_left, update_right;
@@ -88,8 +88,8 @@
//and then
#ifdef INTERP_Z
{
- GLfloat d1 = p1->z - p0->z; //d1 first usage
- GLfloat d2 = p2->z - p0->z;
+ d1 = p1->z - p0->z; //d1 first usage
+ d2 = p2->z - p0->z;
dzdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dzdy = (GLint)(fdx1 * d2 - fdx2 * d1);
}
@@ -96,19 +96,19 @@
#endif
#ifdef INTERP_RGB
-{GLfloat d1, d2;
+{
d1 = p1->r - p0->r;
d2 = p2->r - p0->r;
drdx = (GLint)(fdy2 * d1 - fdy1 * d2);
drdy = (GLint)(fdx1 * d2 - fdx2 * d1);
}
-{GLfloat d1, d2;
+{
d1 = p1->g - p0->g;
d2 = p2->g - p0->g;
dgdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dgdy = (GLint)(fdx1 * d2 - fdx2 * d1);
}
-{GLfloat d1, d2;
+{
d1 = p1->b - p0->b;
d2 = p2->b - p0->b;
dbdx = (GLint)(fdy2 * d1 - fdy1 * d2);
@@ -117,13 +117,13 @@
#endif
#ifdef INTERP_ST
-{GLfloat d1, d2;
+{
d1 = p1->s - p0->s;
d2 = p2->s - p0->s;
dsdx = (GLint)(fdy2 * d1 - fdy1 * d2);
dsdy = (GLint)(fdx1 * d2 - fdx2 * d1);
}
-{GLfloat d1, d2;
+{
d1 = p1->t - p0->t;
d2 = p2->t - p0->t;
dtdx = (GLint)(fdy2 * d1 - fdy1 * d2);
@@ -144,13 +144,13 @@
p2->sz = (GLfloat)p2->s * zedzed;
p2->tz = (GLfloat)p2->t * zedzed;
}
- {GLfloat d1, d2;
+ {
d1 = p1->sz - p0->sz;
d2 = p2->sz - p0->sz;
dszdx = (fdy2 * d1 - fdy1 * d2);
dszdy = (fdx1 * d2 - fdx2 * d1);
}
- {GLfloat d1, d2;
+ {
d1 = p1->tz - p0->tz;
d2 = p2->tz - p0->tz;
dtzdx = (fdy2 * d1 - fdy1 * d2);
@@ -290,7 +290,7 @@
#endif
n = (x2 >> 16) - x1;
- //pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB);
+ //pp = (PIXEL*)((GLbyte*)pp1 + x1 * PS_ZB);
pp = (PIXEL*)pp1 + x1;
#ifdef INTERP_Z
pz = pz1 + x1;
@@ -317,7 +317,7 @@
#ifdef INTERP_Z
pz += 4;
#endif
-// pp = (PIXEL*)((GLbyte*)pp + 4 * PSZB);
+// pp = (PIXEL*)((GLbyte*)pp + 4 * PS_ZB);
pp += 4;
n -= 4;
}
@@ -324,9 +324,11 @@
while (n >= 0) {
PUT_PIXEL(0); /*the_x++;*/
#ifdef INTERP_Z
- pz += 1;
+ //pz += 1;
+ pz++;
#endif
- pp = (PIXEL*)((GLbyte*)pp + PSZB);
+ /*pp = (PIXEL*)((GLbyte*)pp + PS_ZB);*/
+ pp++;
n--;
}
}
@@ -379,7 +381,8 @@
x2 += dx2dy2;
/* screen coordinates */
- pp1 = (PIXEL*)((GLbyte*)pp1 + zb->linesize);
+ //pp1 = (PIXEL*)((GLbyte*)pp1 + zb->linesize);
+ pp1 += zb->xsize;
#if TGL_FEATURE_POLYGON_STIPPLE == 1
the_y++;
#endif
--- /dev/null
+++ b/src/ztriangle.h.backup
@@ -1,0 +1,398 @@
+/*
+ * An eXtReMeLy complicated, delicate, tuned triangle rasterizer
+ * Aight, so basically this is the most complicated code you'll ever read in your life.
+ * The lifetime of variables has been... SUPER Optimized, that's why there's so many random ass curly braces everywhere.
+ * Yes, it is necessary to do that. This code is extremely delicate
+ * and even a minor fuck-up is gonna tank the framerate
+
+Before committing any changes, run gears, model, and texture on your changed code to make sure you didn't
+fuck up!
+
+Things to keep in mind:
+ 1) Tight control of the lifetimes of variables lets us use registers more often and memory less
+ 2) Doing the same operation on multiple items is faster than doing different things on different items, generally, because
+ they will be able to take advantage of any/all applicable SIMD/vector ops on your hardware.
+ 3) Divide operations are vastly more expensive than add/sub/bitwise/etc
+ 4) Bit shifting is your friend, it's the fast way to multiply or divide by 2.
+ 5) Fixed point math is used for the depth "z" buffer
+ 6) We're not just using floats for everything because this is still supposed to be fast on platforms without SSE2
+ 7)
+ */
+
+{
+ GLfloat fdx1, fdx2, fdy1, fdy2;
+ GLushort* pz1;
+ PIXEL* pp1;
+ GLint update_left, update_right;
+
+ GLint nb_lines, dx1, dy1, dx2, dy2;
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+ GLushort the_y;
+#endif
+ GLint error, derror;
+ GLint x1, dxdy_min, dxdy_max;
+ /* warning: x2 is multiplied by 2^16 */
+ GLint x2, dx2dy2;
+
+#ifdef INTERP_Z
+ GLint z1, dzdx, dzdy, dzdl_min, dzdl_max;
+#endif
+#ifdef INTERP_RGB
+ GLint r1, drdx, drdy, drdl_min, drdl_max;
+ GLint g1, dgdx, dgdy, dgdl_min, dgdl_max;
+ GLint b1, dbdx, dbdy, dbdl_min, dbdl_max;
+#endif
+#ifdef INTERP_ST
+ GLint s1, dsdx, dsdy, dsdl_min, dsdl_max;
+ GLint t1, dtdx, dtdy, dtdl_min, dtdl_max;
+#endif
+#ifdef INTERP_STZ
+ GLfloat sz1, dszdx, dszdy, dszdl_min, dszdl_max;
+ GLfloat tz1, dtzdx, dtzdy, dtzdl_min, dtzdl_max;
+#endif
+
+ /* we sort the vertex with increasing y */
+ if (p1->y < p0->y) {
+ ZBufferPoint *t = p0;
+ p0 = p1;
+ p1 = t;
+ }
+ if (p2->y < p0->y) {
+ ZBufferPoint *t = p2;
+ p2 = p1;
+ p1 = p0;
+ p0 = t;
+ } else if (p2->y < p1->y) {
+ ZBufferPoint *t = p1;
+ p1 = p2;
+ p2 = t;
+ }
+
+
+ /* we compute dXdx and dXdy for all GLinterpolated values */
+ fdx1 = p1->x - p0->x;//fdx1 first usage (VALUE_FDX1_USED)
+ fdy1 = p1->y - p0->y;//fdy1 first usage (VALUE_FDY1_USED)
+
+ fdx2 = p2->x - p0->x;
+ fdy2 = p2->y - p0->y;
+
+ GLfloat fz = fdx1 * fdy2 - fdx2 * fdy1;//fz first usage
+ if (fz == 0)
+ return;
+ fz = 1.0 / fz; //value of fz is used (VALUE_FZ_USED)
+ //for these (VALUE_FZ_USED)
+ fdx1 *= fz;
+ fdy1 *= fz;
+ fdx2 *= fz;
+ fdy2 *= fz;
+ //and then
+#ifdef INTERP_Z
+{
+ GLfloat d1 = p1->z - p0->z; //d1 first usage
+ GLfloat d2 = p2->z - p0->z;
+ dzdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+ dzdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
+#endif
+
+#ifdef INTERP_RGB
+{GLfloat d1, d2;
+ d1 = p1->r - p0->r;
+ d2 = p2->r - p0->r;
+ drdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+ drdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
+{GLfloat d1, d2;
+ d1 = p1->g - p0->g;
+ d2 = p2->g - p0->g;
+ dgdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+ dgdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
+{GLfloat d1, d2;
+ d1 = p1->b - p0->b;
+ d2 = p2->b - p0->b;
+ dbdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+ dbdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
+#endif
+
+#ifdef INTERP_ST
+{GLfloat d1, d2;
+ d1 = p1->s - p0->s;
+ d2 = p2->s - p0->s;
+ dsdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+ dsdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
+{GLfloat d1, d2;
+ d1 = p1->t - p0->t;
+ d2 = p2->t - p0->t;
+ dtdx = (GLint)(fdy2 * d1 - fdy1 * d2);
+ dtdy = (GLint)(fdx1 * d2 - fdx2 * d1);
+}
+#endif
+
+#ifdef INTERP_STZ
+ {
+ GLfloat zedzed;
+ zedzed = (GLfloat)p0->z;
+ p0->sz = (GLfloat)p0->s * zedzed;
+ p0->tz = (GLfloat)p0->t * zedzed;
+ zedzed = (GLfloat)p1->z;
+ p1->sz = (GLfloat)p1->s * zedzed;
+ p1->tz = (GLfloat)p1->t * zedzed;
+ zedzed = (GLfloat)p2->z;
+ p2->sz = (GLfloat)p2->s * zedzed;
+ p2->tz = (GLfloat)p2->t * zedzed;
+ }
+ {GLfloat d1, d2;
+ d1 = p1->sz - p0->sz;
+ d2 = p2->sz - p0->sz;
+ dszdx = (fdy2 * d1 - fdy1 * d2);
+ dszdy = (fdx1 * d2 - fdx2 * d1);
+ }
+ {GLfloat d1, d2;
+ d1 = p1->tz - p0->tz;
+ d2 = p2->tz - p0->tz;
+ dtzdx = (fdy2 * d1 - fdy1 * d2);
+ dtzdy = (fdx1 * d2 - fdx2 * d1);
+ }
+#endif
+
+ /* screen coordinates */
+
+ pp1 = (PIXEL*)(zb->pbuf) + zb->xsize * p0->y; //pp1 first usage
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+ the_y = p0->y;
+#endif
+ pz1 = zb->zbuf + p0->y * zb->xsize;
+
+ DRAW_INIT();
+//part used here and down.
+ for (GLint part = 0; part < 2; part++) {
+ {ZBufferPoint *pr1, *pr2, *l1, *l2; //BEGINNING OF LIFETIME FOR ZBUFFERPOINT VARS!!!
+ if (part == 0) {
+ if (fz > 0) { //Here! (VALUE_FZ_USED)
+ update_left = 1;
+ update_right = 1;
+ l1 = p0; //MARK l1 first usage
+ l2 = p2; //MARK l2 first usage
+ pr1 = p0; //MARK first usage of pr1
+ pr2 = p1; //MARK first usage pf pr2
+ } else {
+ update_left = 1;
+ update_right = 1;
+ l1 = p0;
+ l2 = p1;
+ pr1 = p0;
+ pr2 = p2;
+ }
+ nb_lines = p1->y - p0->y;
+ } else { //SECOND PART~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ /* second part */
+ if (fz > 0) { //fz last usage (VALUE_FZ_USED)
+ update_left = 0;
+ update_right = 1;
+ pr1 = p1;
+ pr2 = p2;
+ } else {
+ update_left = 1;
+ update_right = 0;
+ l1 = p1;
+ l2 = p2;
+ }
+ nb_lines = p2->y - p1->y + 1;
+ } //EOF SECOND PART
+
+ /* compute the values for the left edge */
+ //pr1 and pr2 are not used inside this area.
+ if (update_left) {
+ {
+ register GLint tmp;
+ dy1 = l2->y - l1->y;
+ dx1 = l2->x - l1->x;
+ if (dy1 > 0)
+ tmp = (dx1 << 16) / dy1;
+ else
+ tmp = 0;
+ x1 = l1->x;
+ error = 0;
+ derror = tmp & 0x0000ffff;
+ dxdy_min = tmp >> 16;
+ }
+ dxdy_max = dxdy_min + 1;
+#ifdef INTERP_Z
+ z1 = l1->z;
+ dzdl_min = (dzdy + dzdx * dxdy_min);
+ dzdl_max = dzdl_min + dzdx;
+#endif
+#ifdef INTERP_RGB
+ r1 = l1->r;
+ drdl_min = (drdy + drdx * dxdy_min);
+ drdl_max = drdl_min + drdx;
+ g1 = l1->g;
+ dgdl_min = (dgdy + dgdx * dxdy_min);
+ dgdl_max = dgdl_min + dgdx;
+ b1 = l1->b;
+ dbdl_min = (dbdy + dbdx * dxdy_min);
+ dbdl_max = dbdl_min + dbdx;
+#endif
+#ifdef INTERP_ST
+ s1 = l1->s;
+ dsdl_min = (dsdy + dsdx * dxdy_min);
+ dsdl_max = dsdl_min + dsdx;
+ t1 = l1->t;
+ dtdl_min = (dtdy + dtdx * dxdy_min);
+ dtdl_max = dtdl_min + dtdx;
+#endif
+#ifdef INTERP_STZ
+ sz1 = l1->sz;
+ dszdl_min = (dszdy + dszdx * dxdy_min);
+ dszdl_max = dszdl_min + dszdx;
+ tz1 = l1->tz;
+ dtzdl_min = (dtzdy + dtzdx * dxdy_min);
+ dtzdl_max = dtzdl_min + dtzdx;
+#endif
+ } //EOF update left
+ //Is l1 used after update_left?
+ /* compute values for the right edge */
+
+ if (update_right) {
+ dx2 = (pr2->x - pr1->x);
+ dy2 = (pr2->y - pr1->y); //LAST USAGE OF PR2
+ if (dy2 > 0)
+ dx2dy2 = (dx2 << 16) / dy2;
+ else
+ dx2dy2 = 0;
+ x2 = pr1->x << 16; //LAST USAGE OF PR1
+ } //EOF update right
+ } //End of lifetime for ZBufferpoints
+ /* we draw all the scan line of the part */
+
+ while (nb_lines > 0) {
+ nb_lines--;
+#ifndef DRAW_LINE
+ /* generic draw line */
+ {
+ register PIXEL* pp;
+ register GLint n;
+#ifdef INTERP_Z
+ register GLushort* pz;
+ register GLuint z;
+#endif
+#ifdef INTERP_RGB
+ register GLuint or1, og1, ob1;
+#endif
+#ifdef INTERP_ST
+ register GLuint s, t;
+#endif
+#ifdef INTERP_STZ
+ //GLfloat sz, tz; //These variables go unused in this draw line function.
+#endif
+
+ n = (x2 >> 16) - x1;
+ //pp = (PIXEL*)((GLbyte*)pp1 + x1 * PSZB);
+ pp = (PIXEL*)pp1 + x1;
+#ifdef INTERP_Z
+ pz = pz1 + x1;
+ z = z1;
+#endif
+#ifdef INTERP_RGB
+ or1 = r1;
+ og1 = g1;
+ ob1 = b1;
+#endif
+#ifdef INTERP_ST
+ s = s1;
+ t = t1;
+#endif
+#ifdef INTERP_STZ
+// sz = sz1; //What is SZ used for?
+// tz = tz1; //What is TZ used for?
+#endif
+ while (n >= 3) {
+ PUT_PIXEL(0); /*the_x++;*/
+ PUT_PIXEL(1); /*the_x++;*/
+ PUT_PIXEL(2); /*the_x++;*/
+ PUT_PIXEL(3); /*the_x++;*/
+#ifdef INTERP_Z
+ pz += 4;
+#endif
+// pp = (PIXEL*)((GLbyte*)pp + 4 * PSZB);
+ pp += 4;
+ n -= 4;
+ }
+ while (n >= 0) {
+ PUT_PIXEL(0); /*the_x++;*/
+#ifdef INTERP_Z
+ pz += 1;
+#endif
+ pp = (PIXEL*)((GLbyte*)pp + PSZB);
+ n--;
+ }
+ }
+#else
+ DRAW_LINE();
+#endif
+
+ /* left edge */
+ error += derror;
+ if (error > 0) {
+ error -= 0x10000;
+ x1 += dxdy_max;
+#ifdef INTERP_Z
+ z1 += dzdl_max;
+#endif
+#ifdef INTERP_RGB
+ r1 += drdl_max;
+ g1 += dgdl_max;
+ b1 += dbdl_max;
+#endif
+#ifdef INTERP_ST
+ s1 += dsdl_max;
+ t1 += dtdl_max;
+#endif
+#ifdef INTERP_STZ
+ sz1 += dszdl_max;
+ tz1 += dtzdl_max;
+#endif
+ } else {
+ x1 += dxdy_min;
+#ifdef INTERP_Z
+ z1 += dzdl_min;
+#endif
+#ifdef INTERP_RGB
+ r1 += drdl_min;
+ g1 += dgdl_min;
+ b1 += dbdl_min;
+#endif
+#ifdef INTERP_ST
+ s1 += dsdl_min;
+ t1 += dtdl_min;
+#endif
+#ifdef INTERP_STZ
+ sz1 += dszdl_min;
+ tz1 += dtzdl_min;
+#endif
+ }
+
+ /* right edge */
+ x2 += dx2dy2;
+
+ /* screen coordinates */
+ pp1 = (PIXEL*)((GLbyte*)pp1 + zb->linesize);
+#if TGL_FEATURE_POLYGON_STIPPLE == 1
+ the_y++;
+#endif
+ pz1 += zb->xsize;
+ }
+ }
+}
+
+#undef INTERP_Z
+#undef INTERP_RGB
+#undef INTERP_ST
+#undef INTERP_STZ
+
+#undef DRAW_INIT
+#undef DRAW_LINE
+#undef PUT_PIXEL