shithub: tinygl

Download patch

ref: 9c620f78c79a668824dfd967598bcfc58a249e4e
parent: 08d0015032b413d93ea4e7aa18c26c94bf69f29d
author: David <gek@katherine>
date: Thu Mar 11 19:09:49 EST 2021

Automatic commit.

--- a/README.md
+++ b/README.md
@@ -48,6 +48,8 @@
 #include <string.h>
 #include <stdarg.h>
 ```
+If your system supports it, the library can also take advantage of `alignas` to get SIMD support,
+which can be disabled in zfeatures. This adds a dependency to `stdalign.h` but greatly increases vertex processing speed.
 
 If you are unsure if your target platform can support TinyGL, compile it with the buildtime and runtime tests enabled (They are, by default)
 
--- a/include/zfeatures.h
+++ b/include/zfeatures.h
@@ -70,6 +70,15 @@
 #define TGL_FEATURE_MULTITHREADED_COPY_TEXIMAGE_2D 1
 //Enable multithreading the ZB_CopyBuffer operation.
 #define TGL_FEATURE_MULTITHREADED_ZB_COPYBUFFER 1
+//Enable stdalign
+#define TGL_FEATURE_ALIGNAS 1
+
+#if TGL_FEATURE_ALIGNAS == 1
+#include <stdalign.h>
+#define TGL_ALIGN alignas(16)
+#else
+#define TGL_ALIGN /*a comment*/
+#endif
 
 //DO NOT TURN THESE ON, I don't maintain them and I actively #error them out.
 #define TGL_FEATURE_8_BITS         0
--- a/src/get.c
+++ b/src/get.c
@@ -145,6 +145,10 @@
 #else
 "TGL_FEATURE_SINGLE_THREADED "
 #endif
+
+#if TGL_FEATURE_ALIGNAS
+"TGL_FEATURE_ALIGNAS "
+#endif
 "TGL_BUFFER_EXT "
 "TGL_FEEDBACK "
 "TGL_SELECT "
--- a/src/zmath.c
+++ b/src/zmath.c
@@ -10,6 +10,7 @@
 
 void gl_M4_Id(M4* a) {
 	GLint i, j;
+#pragma omp simd collapse(2)
 	for (i = 0; i < 4; i++)
 		for (j = 0; j < 4; j++)
 			if (i == j)
@@ -34,6 +35,7 @@
 void gl_M4_Mul(M4* c, M4* a, M4* b) {
 	GLint i, j, k;
 	GLfloat s;
+#pragma omp simd
 	for (i = 0; i < 4; i++)
 		for (j = 0; j < 4; j++) {
 			s = 0.0;
@@ -52,7 +54,7 @@
 	/*memcpy(&a, c, 16*sizeof(GLfloat));
 	 */
 	a = *c;
-
+#pragma omp simd
 	for (i = 0; i < 4; i++)
 		for (j = 0; j < 4; j++) {
 			s = 0.0;
@@ -79,14 +81,17 @@
 }
 
 void gl_M4_MulV4(V4* a, M4* b, V4* c) {
-	a->X = b->m[0][0] * c->X + b->m[0][1] * c->Y + b->m[0][2] * c->Z + b->m[0][3] * c->W;
-	a->Y = b->m[1][0] * c->X + b->m[1][1] * c->Y + b->m[1][2] * c->Z + b->m[1][3] * c->W;
-	a->Z = b->m[2][0] * c->X + b->m[2][1] * c->Y + b->m[2][2] * c->Z + b->m[2][3] * c->W;
-	a->W = b->m[3][0] * c->X + b->m[3][1] * c->Y + b->m[3][2] * c->Z + b->m[3][3] * c->W;
+	{
+		a->X = b->m[0][0] * c->X + b->m[0][1] * c->Y + b->m[0][2] * c->Z + b->m[0][3] * c->W;
+		a->Y = b->m[1][0] * c->X + b->m[1][1] * c->Y + b->m[1][2] * c->Z + b->m[1][3] * c->W;
+		a->Z = b->m[2][0] * c->X + b->m[2][1] * c->Y + b->m[2][2] * c->Z + b->m[2][3] * c->W;
+		a->W = b->m[3][0] * c->X + b->m[3][1] * c->Y + b->m[3][2] * c->Z + b->m[3][3] * c->W;
+	}
 }
 
 /* transposition of a 4x4 matrix */
 void gl_M4_Transpose(M4* a, M4* b) {
+{
 	a->m[0][0] = b->m[0][0];
 	a->m[0][1] = b->m[1][0];
 	a->m[0][2] = b->m[2][0];
@@ -107,11 +112,13 @@
 	a->m[3][2] = b->m[2][3];
 	a->m[3][3] = b->m[3][3];
 }
+}
 
 /* inversion of an orthogonal matrix of type Y=M.X+P */
 void gl_M4_InvOrtho(M4* a, M4 b) {
 	GLint i, j;
 	GLfloat s;
+#pragma omp simd
 	for (i = 0; i < 3; i++)
 		for (j = 0; j < 3; j++)
 			a->m[i][j] = b.m[j][i];
@@ -119,8 +126,10 @@
 	a->m[3][1] = 0.0;
 	a->m[3][2] = 0.0;
 	a->m[3][3] = 1.0;
+
 	for (i = 0; i < 3; i++) {
 		s = 0;
+#pragma omp simd
 		for (j = 0; j < 3; j++)
 			s -= b.m[j][i] * b.m[j][3];
 		a->m[i][3] = s;
@@ -134,12 +143,12 @@
 	GLint i, j, k, l;
 	GLfloat max, tmp, t;
 
-	/* identit�e dans r */
+	/*  */
+#pragma omp simd
 	for (i = 0; i < n * n; i++)
 		r[i] = 0;
 	for (i = 0; i < n; i++)
 		r[i * n + i] = 1;
-
 	for (j = 0; j < n; j++) {
 
 		/* recherche du nombre de plus grand module sur la colonne j */
@@ -157,6 +166,7 @@
 
 		/* permutation des lignes j et k */
 		if (k != j) {
+#pragma omp simd
 			for (i = 0; i < n; i++) {
 				tmp = m[j * n + i];
 				m[j * n + i] = m[k * n + i];
@@ -170,11 +180,11 @@
 
 		/* multiplication de la ligne j par 1/max */
 		max = 1 / max;
+#pragma omp simd
 		for (i = 0; i < n; i++) {
 			m[j * n + i] *= max;
 			r[j * n + i] *= max;
 		}
-
 		for (l = 0; l < n; l++)
 			if (l != j) {
 				t = m[l * n + j];
@@ -219,7 +229,6 @@
 
 	det = m->m[0][0] * m->m[1][1] * m->m[2][2] - m->m[0][0] * m->m[1][2] * m->m[2][1] - m->m[1][0] * m->m[0][1] * m->m[2][2] +
 		  m->m[1][0] * m->m[0][2] * m->m[2][1] + m->m[2][0] * m->m[0][1] * m->m[1][2] - m->m[2][0] * m->m[0][2] * m->m[1][1];
-
 	a->m[0][0] = (m->m[1][1] * m->m[2][2] - m->m[1][2] * m->m[2][1]) / det;
 	a->m[0][1] = -(m->m[0][1] * m->m[2][2] - m->m[0][2] * m->m[2][1]) / det;
 	a->m[0][2] = -(-m->m[0][1] * m->m[1][2] + m->m[0][2] * m->m[1][1]) / det;
@@ -231,6 +240,7 @@
 	a->m[2][0] = (m->m[1][0] * m->m[2][1] - m->m[1][1] * m->m[2][0]) / det;
 	a->m[2][1] = -(m->m[0][0] * m->m[2][1] - m->m[0][1] * m->m[2][0]) / det;
 	a->m[2][2] = (m->m[0][0] * m->m[1][1] - m->m[0][1] * m->m[1][0]) / det;
+
 }
 
 /* vector arithmetic */
--- a/src/zmath.h
+++ b/src/zmath.h
@@ -1,6 +1,7 @@
 #ifndef __ZMATH__
 #define __ZMATH__
 #include "../include/GL/gl.h"
+#include "../include/zfeatures.h"
 #include <stdlib.h>
 #include <string.h> //For memcpy
 #include <math.h>
@@ -7,15 +8,15 @@
 /* Matrix & Vertex */
 
 typedef struct {
-	GLfloat m[4][4];
+	TGL_ALIGN GLfloat m[4][4];
 } M4;
 
 typedef struct {
-	GLfloat m[3][3];
+	TGL_ALIGN GLfloat m[3][3];
 } M3;
 
 typedef struct {
-	GLfloat m[3][4];
+	TGL_ALIGN GLfloat m[3][4];
 } M34;
 
 #define X v[0]
@@ -24,11 +25,11 @@
 #define W v[3]
 
 typedef struct {
-	GLfloat v[3];
+	TGL_ALIGN GLfloat v[3];
 } V3;
 
 typedef struct {
-	GLfloat v[4];
+	TGL_ALIGN GLfloat v[4];
 } V4;
 
 void gl_M4_Id(M4* a);