shithub: leaf

Download patch

ref: fcda8e4a7dca01b8ac73219d7cc38aee6d3de318
parent: 41a57875ac525c47ff92e823218eabdb45c7bad0
author: Matthew Wang <mjw7@princeton.edu>
date: Tue Jul 21 12:26:03 EDT 2020

improving popcount function, use compiler builtins when possible

--- a/leaf/Src/leaf-analysis.c
+++ b/leaf/Src/leaf-analysis.c
@@ -1061,7 +1061,7 @@
 
 
     z->_info = (tZeroCrossingInfo*) mpool_alloc(sizeof(tZeroCrossingInfo) * z->_size, m);
-    for (uint i = 0; i < z->_size; i++)
+    for (unsigned i = 0; i < z->_size; i++)
         tZeroCrossingInfo_initToPool(&z->_info[i], mp);
 
     z->_pos = 0;
@@ -1446,21 +1446,33 @@
     
     if (shift == 0)
     {
-        for (uint i = 0; i != b->_mid_array; ++i)
+        for (unsigned i = 0; i != b->_mid_array; ++i)
+        {
             // built in compiler popcount functions should be faster but we want this to be portable
             // could try to add some define that call the correct function depending on compiler
             // or let the user pointer popcount() to whatever they want
             // something to look into...
+#ifdef __GNUC__
+            count += __builtin_popcount(*p1++ ^ *p2++);
+#elif _MSC_VER
+            count += __popcnt(*p1++ ^ *p2++);
+#endif
             count += popcount(*p1++ ^ *p2++);
+        }
     }
     else
     {
         const int shift2 = value_size - shift;
-        for (uint i = 0; i != b->_mid_array; ++i)
+        for (unsigned i = 0; i != b->_mid_array; ++i)
         {
             unsigned int v = *p2++ >> shift;
             v |= *p2 << shift2;
-            count += popcount(*p1++ ^ v);
+#ifdef __GNUC__
+            count += __builtin_popcount(*p1++ ^ *p2++);
+#elif _MSC_VER
+            count += __popcnt(*p1++ ^ *p2++);
+#endif
+            count += popcount(*p1++ ^ *p2++);
         }
     }
     return count;
--- a/leaf/Src/leaf-delay.c
+++ b/leaf/Src/leaf-delay.c
@@ -58,7 +58,7 @@
 void    tDelay_clear(tDelay* const dl)
 {
     _tDelay* d = *dl;
-    for (uint i = 0; i < d->maxDelay; i++)
+    for (unsigned i = 0; i < d->maxDelay; i++)
     {
         d->buff[i] = 0;
     }
@@ -203,7 +203,7 @@
 void    tLinearDelay_clear(tLinearDelay* const dl)
 {
     _tLinearDelay* d = *dl;
-    for (uint i = 0; i < d->maxDelay; i++)
+    for (unsigned i = 0; i < d->maxDelay; i++)
     {
         d->buff[i] = 0;
     }
@@ -397,7 +397,7 @@
 void    tHermiteDelay_clear(tHermiteDelay* const dl)
 {
     _tHermiteDelay* d = *dl;
-    for (uint i = 0; i < d->maxDelay; i++)
+    for (unsigned i = 0; i < d->maxDelay; i++)
     {
         d->buff[i] = 0;
     }
@@ -593,7 +593,7 @@
 void tAllpassDelay_clear(tAllpassDelay* const dl)
 {
     _tAllpassDelay* d = *dl;
-    for (uint i = 0; i < d->maxDelay; i++)
+    for (unsigned i = 0; i < d->maxDelay; i++)
     {
         d->buff[i] = 0;
     }
@@ -763,7 +763,7 @@
 void tTapeDelay_clear(tTapeDelay* const dl)
 {
     _tTapeDelay* d = *dl;
-    for (uint i = 0; i < d->maxDelay; i++)
+    for (unsigned i = 0; i < d->maxDelay; i++)
     {
         d->buff[i] = 0;
     }
--- a/leaf/Src/leaf-math.c
+++ b/leaf/Src/leaf-math.c
@@ -731,10 +731,16 @@
 // something to look into...
 int popcount(unsigned int x)
 {
-    int c = 0;
-    for (; x != 0; x &= x - 1)
-        c++;
-    return c;
+//    int c = 0;
+//    for (; x != 0; x &= x - 1)
+//        c++;
+//    return c;
+    unsigned long long y;
+    y = x * 0x0002000400080010ULL;
+    y = y & 0x1111111111111111ULL;
+    y = y * 0x1111111111111111ULL;
+    y = y >> 60;
+    return (int) y;
 }
 
 float median3f(float a, float b, float c)
--- a/leaf/Src/leaf-sampling.c
+++ b/leaf/Src/leaf-sampling.c
@@ -80,7 +80,7 @@
 void  tBuffer_read(tBuffer* const sb, float* buff, uint32_t len)
 {
     _tBuffer* s = *sb;
-    for (uint i = 0; i < s->bufferLength; i++)
+    for (unsigned i = 0; i < s->bufferLength; i++)
     {
         if (i < len)    s->buff[i] = buff[i];
         else            s->buff[i] = 0.f;
@@ -129,7 +129,7 @@
 void  tBuffer_clear (tBuffer* const sb)
 {
     _tBuffer* s = *sb;
-    for (uint i = 0; i < s->bufferLength; i++)
+    for (unsigned i = 0; i < s->bufferLength; i++)
     {
         s->buff[i] = 0.f;
     }