ref: 9c620f78c79a668824dfd967598bcfc58a249e4e
parent: 08d0015032b413d93ea4e7aa18c26c94bf69f29d
author: David <gek@katherine>
date: Thu Mar 11 19:09:49 EST 2021
Automatic commit.
--- a/README.md
+++ b/README.md
@@ -48,6 +48,8 @@
#include <string.h>
#include <stdarg.h>
```
+If your system supports it, the library can also take advantage of `alignas` to get SIMD support,
+which can be disabled in zfeatures. This adds a dependency to `stdalign.h` but greatly increases vertex processing speed.
If you are unsure if your target platform can support TinyGL, compile it with the buildtime and runtime tests enabled (They are, by default)
--- a/include/zfeatures.h
+++ b/include/zfeatures.h
@@ -70,6 +70,15 @@
#define TGL_FEATURE_MULTITHREADED_COPY_TEXIMAGE_2D 1
//Enable multithreading the ZB_CopyBuffer operation.
#define TGL_FEATURE_MULTITHREADED_ZB_COPYBUFFER 1
+//Enable stdalign
+#define TGL_FEATURE_ALIGNAS 1
+
+#if TGL_FEATURE_ALIGNAS == 1
+#include <stdalign.h>
+#define TGL_ALIGN alignas(16)
+#else
+#define TGL_ALIGN /*a comment*/
+#endif
//DO NOT TURN THESE ON, I don't maintain them and I actively #error them out.
#define TGL_FEATURE_8_BITS 0
--- a/src/get.c
+++ b/src/get.c
@@ -145,6 +145,10 @@
#else
"TGL_FEATURE_SINGLE_THREADED "
#endif
+
+#if TGL_FEATURE_ALIGNAS
+"TGL_FEATURE_ALIGNAS "
+#endif
"TGL_BUFFER_EXT "
"TGL_FEEDBACK "
"TGL_SELECT "
--- a/src/zmath.c
+++ b/src/zmath.c
@@ -10,6 +10,7 @@
void gl_M4_Id(M4* a) {
GLint i, j;
+#pragma omp simd collapse(2)
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
if (i == j)
@@ -34,6 +35,7 @@
void gl_M4_Mul(M4* c, M4* a, M4* b) {
GLint i, j, k;
GLfloat s;
+#pragma omp simd
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++) {
s = 0.0;
@@ -52,7 +54,7 @@
/*memcpy(&a, c, 16*sizeof(GLfloat));
*/
a = *c;
-
+#pragma omp simd
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++) {
s = 0.0;
@@ -79,14 +81,17 @@
}
void gl_M4_MulV4(V4* a, M4* b, V4* c) {
- a->X = b->m[0][0] * c->X + b->m[0][1] * c->Y + b->m[0][2] * c->Z + b->m[0][3] * c->W;
- a->Y = b->m[1][0] * c->X + b->m[1][1] * c->Y + b->m[1][2] * c->Z + b->m[1][3] * c->W;
- a->Z = b->m[2][0] * c->X + b->m[2][1] * c->Y + b->m[2][2] * c->Z + b->m[2][3] * c->W;
- a->W = b->m[3][0] * c->X + b->m[3][1] * c->Y + b->m[3][2] * c->Z + b->m[3][3] * c->W;
+ {
+ a->X = b->m[0][0] * c->X + b->m[0][1] * c->Y + b->m[0][2] * c->Z + b->m[0][3] * c->W;
+ a->Y = b->m[1][0] * c->X + b->m[1][1] * c->Y + b->m[1][2] * c->Z + b->m[1][3] * c->W;
+ a->Z = b->m[2][0] * c->X + b->m[2][1] * c->Y + b->m[2][2] * c->Z + b->m[2][3] * c->W;
+ a->W = b->m[3][0] * c->X + b->m[3][1] * c->Y + b->m[3][2] * c->Z + b->m[3][3] * c->W;
+ }
}
/* transposition of a 4x4 matrix */
void gl_M4_Transpose(M4* a, M4* b) {
+{
a->m[0][0] = b->m[0][0];
a->m[0][1] = b->m[1][0];
a->m[0][2] = b->m[2][0];
@@ -107,11 +112,13 @@
a->m[3][2] = b->m[2][3];
a->m[3][3] = b->m[3][3];
}
+}
/* inversion of an orthogonal matrix of type Y=M.X+P */
void gl_M4_InvOrtho(M4* a, M4 b) {
GLint i, j;
GLfloat s;
+#pragma omp simd
for (i = 0; i < 3; i++)
for (j = 0; j < 3; j++)
a->m[i][j] = b.m[j][i];
@@ -119,8 +126,10 @@
a->m[3][1] = 0.0;
a->m[3][2] = 0.0;
a->m[3][3] = 1.0;
+
for (i = 0; i < 3; i++) {
s = 0;
+#pragma omp simd
for (j = 0; j < 3; j++)
s -= b.m[j][i] * b.m[j][3];
a->m[i][3] = s;
@@ -134,12 +143,12 @@
GLint i, j, k, l;
GLfloat max, tmp, t;
- /* identit�e dans r */
+ /* */
+#pragma omp simd
for (i = 0; i < n * n; i++)
r[i] = 0;
for (i = 0; i < n; i++)
r[i * n + i] = 1;
-
for (j = 0; j < n; j++) {
/* recherche du nombre de plus grand module sur la colonne j */
@@ -157,6 +166,7 @@
/* permutation des lignes j et k */
if (k != j) {
+#pragma omp simd
for (i = 0; i < n; i++) {
tmp = m[j * n + i];
m[j * n + i] = m[k * n + i];
@@ -170,11 +180,11 @@
/* multiplication de la ligne j par 1/max */
max = 1 / max;
+#pragma omp simd
for (i = 0; i < n; i++) {
m[j * n + i] *= max;
r[j * n + i] *= max;
}
-
for (l = 0; l < n; l++)
if (l != j) {
t = m[l * n + j];
@@ -219,7 +229,6 @@
det = m->m[0][0] * m->m[1][1] * m->m[2][2] - m->m[0][0] * m->m[1][2] * m->m[2][1] - m->m[1][0] * m->m[0][1] * m->m[2][2] +
m->m[1][0] * m->m[0][2] * m->m[2][1] + m->m[2][0] * m->m[0][1] * m->m[1][2] - m->m[2][0] * m->m[0][2] * m->m[1][1];
-
a->m[0][0] = (m->m[1][1] * m->m[2][2] - m->m[1][2] * m->m[2][1]) / det;
a->m[0][1] = -(m->m[0][1] * m->m[2][2] - m->m[0][2] * m->m[2][1]) / det;
a->m[0][2] = -(-m->m[0][1] * m->m[1][2] + m->m[0][2] * m->m[1][1]) / det;
@@ -231,6 +240,7 @@
a->m[2][0] = (m->m[1][0] * m->m[2][1] - m->m[1][1] * m->m[2][0]) / det;
a->m[2][1] = -(m->m[0][0] * m->m[2][1] - m->m[0][1] * m->m[2][0]) / det;
a->m[2][2] = (m->m[0][0] * m->m[1][1] - m->m[0][1] * m->m[1][0]) / det;
+
}
/* vector arithmetic */
--- a/src/zmath.h
+++ b/src/zmath.h
@@ -1,6 +1,7 @@
#ifndef __ZMATH__
#define __ZMATH__
#include "../include/GL/gl.h"
+#include "../include/zfeatures.h"
#include <stdlib.h>
#include <string.h> //For memcpy
#include <math.h>
@@ -7,15 +8,15 @@
/* Matrix & Vertex */
typedef struct {
- GLfloat m[4][4];
+ TGL_ALIGN GLfloat m[4][4];
} M4;
typedef struct {
- GLfloat m[3][3];
+ TGL_ALIGN GLfloat m[3][3];
} M3;
typedef struct {
- GLfloat m[3][4];
+ TGL_ALIGN GLfloat m[3][4];
} M34;
#define X v[0]
@@ -24,11 +25,11 @@
#define W v[3]
typedef struct {
- GLfloat v[3];
+ TGL_ALIGN GLfloat v[3];
} V3;
typedef struct {
- GLfloat v[4];
+ TGL_ALIGN GLfloat v[4];
} V4;
void gl_M4_Id(M4* a);