shithub: leaf

Download patch

ref: f253eaf415bc6455db4d38dd4804d7136d874705
parent: a8e415891948209f24d7add13c3930706f895c18
author: Matthew Wang <mjw7@princeton.edu>
date: Mon Nov 9 12:58:36 EST 2020

trying out some ways to improve and optimize pitch detection

--- a/leaf/Inc/leaf-analysis.h
+++ b/leaf/Inc/leaf-analysis.h
@@ -909,6 +909,7 @@
         float _mean;
         float _predicted_frequency;
         int _first;
+        int sub;
         
     } _tDualPitchDetector;
     
--- a/leaf/Inc/leaf-effects.h
+++ b/leaf/Inc/leaf-effects.h
@@ -430,11 +430,11 @@
      
      @} */
     
-#define LOOPSIZE (2048*2)      // (4096*2) // loop size must be power of two
-#define LOOPMASK (LOOPSIZE - 1)
+//#define LOOPSIZE (2048*2)      // (4096*2) // loop size must be power of two
+//#define LOOPMASK (LOOPSIZE - 1)
 #define PITCHFACTORDEFAULT 1.0f
 #define INITPERIOD 64.0f
-#define MAXPERIOD (float)((LOOPSIZE - w->blocksize) * 0.8f)
+//#define MAXPERIOD (float)((LOOPSIZE - w->blocksize) * 0.8f)
 #define MINPERIOD 8.0f
     
     typedef struct _tSOLAD
@@ -444,6 +444,7 @@
         tAttackDetection ad;
         tHighpass hp;
         
+        int loopSize;
         uint16_t timeindex;              // current reference time, write index
         uint16_t blocksize;              // signal input / output block size
         float pitchfactor;        // pitch factor between 0.25 and 4
@@ -459,8 +460,8 @@
     
     typedef _tSOLAD* tSOLAD;
     
-    void    tSOLAD_init             (tSOLAD* const, LEAF* const leaf);
-    void    tSOLAD_initToPool       (tSOLAD* const, tMempool* const);
+    void    tSOLAD_init             (tSOLAD* const, int loopSize, LEAF* const leaf);
+    void    tSOLAD_initToPool       (tSOLAD* const, int loopSize, tMempool* const);
     void    tSOLAD_free             (tSOLAD* const);
     
     // send one block of input samples, receive one block of output samples
@@ -521,17 +522,21 @@
         float* inBuffer;
         int bufSize;
         int index;
+        
+        float pickiness;
     } _tPitchShift;
     
     typedef _tPitchShift* tPitchShift;
     
-    void    tPitchShift_init (tPitchShift* const, tDualPitchDetector* const, LEAF* const leaf);
-    void    tPitchShift_initToPool (tPitchShift* const, tDualPitchDetector* const, tMempool* const);
+    void    tPitchShift_init (tPitchShift* const, tDualPitchDetector* const, int bufSize, LEAF* const leaf);
+    void    tPitchShift_initToPool (tPitchShift* const, tDualPitchDetector* const, int bufSize, tMempool* const);
     void    tPitchShift_free (tPitchShift* const);
     
-    void    tPitchShift_shiftBy (tPitchShift* const, float factor, float* in, float* out, int bufSize);
-    void    tPitchShift_shiftTo (tPitchShift* const, float freq, float* in, float* out, int bufSize);
+    void    tPitchShift_shiftBy (tPitchShift* const, float factor, float* in, float* out);
+    void    tPitchShift_shiftTo (tPitchShift* const, float freq, float* in, float* out);
     
+    void    tPitchShift_setPickiness (tPitchShift* const, float p);
+    
     /*!
      @defgroup tsimpleretune tSimpleRetune
      @ingroup effects
@@ -582,7 +587,7 @@
         int bufSize;
         int index;
         
-        void (*shiftFunction)(tPitchShift* const, float, float*, float*, int);
+        void (*shiftFunction)(tPitchShift* const, float, float*, float*);
         
         float* shiftValues;
         int numVoices;
@@ -597,6 +602,7 @@
     float   tSimpleRetune_tick                  (tSimpleRetune* const, float sample);
     void    tSimpleRetune_setMode               (tSimpleRetune* const, int mode);
     void    tSimpleRetune_setNumVoices          (tSimpleRetune* const, int numVoices);
+    void    tSimpleRetune_setPickiness          (tSimpleRetune* const, float p);
     void    tSimpleRetune_tuneVoices            (tSimpleRetune* const, float* t);
     void    tSimpleRetune_tuneVoice             (tSimpleRetune* const, int voice, float t);
     float   tSimpleRetune_getInputFrequency     (tSimpleRetune* const);
@@ -653,7 +659,7 @@
         
         float* output;
         
-        void (*shiftFunction)(tPitchShift* const, float, float*, float*, int);
+        void (*shiftFunction)(tPitchShift* const, float, float*, float*);
         
         float* shiftValues;
         int numVoices;
@@ -668,6 +674,7 @@
     float*  tRetune_tick                (tRetune* const, float sample);
     void    tRetune_setMode             (tRetune* const, int mode);
     void    tRetune_setNumVoices        (tRetune* const, int numVoices);
+    void    tRetune_setPickiness        (tRetune* const, float p);
     void    tRetune_tuneVoices          (tRetune* const, float* t);
     void    tRetune_tuneVoice           (tRetune* const, int voice, float t);
     float   tRetune_getInputFrequency   (tRetune* const);
--- a/leaf/Src/leaf-analysis.c
+++ b/leaf/Src/leaf-analysis.c
@@ -1889,7 +1889,7 @@
     _tPitchDetector* p = *detector = (_tPitchDetector*) mpool_alloc(sizeof(_tPitchDetector), m);
     p->mempool = m;
     
-    tPeriodDetector_initToPool(&p->_pd, lowestFreq, highestFreq, DEFAULT_HYSTERESIS, mempool);
+    tPeriodDetector_initToPool(&p->_pd, lowestFreq, highestFreq, -120.0f, mempool);
     p->_current.frequency = 0.0f;
     p->_current.periodicity = 0.0f;
     p->_frames_after_shift = 0;
@@ -2119,13 +2119,14 @@
     _tDualPitchDetector* p = *detector = (_tDualPitchDetector*) mpool_alloc(sizeof(_tDualPitchDetector), m);
     p->mempool = m;
     
-    tPitchDetector_initToPool(&p->_pd1, lowestFreq, highestFreq, mempool);
-    tPitchDetector_initToPool(&p->_pd2, lowestFreq, highestFreq, mempool);
+    tPitchDetector_initToPool(&p->_pd1, lowestFreq*2.0f, highestFreq*2.0f, mempool);
+    tPitchDetector_initToPool(&p->_pd2, lowestFreq*2.0f, highestFreq*2.0f, mempool);
     p->_current.frequency = 0.0f;
     p->_current.periodicity = 0.0f;
     p->_mean = lowestFreq + ((highestFreq - lowestFreq) / 2.0f);
     p->_predicted_frequency = 0.0f;
     p->_first = 1;
+    p->sub = 0;
 }
 
 void    tDualPitchDetector_free (tDualPitchDetector* const detector)
@@ -2142,35 +2143,53 @@
 {
     _tDualPitchDetector* p = *detector;
     
-    int pd1_ready = tPitchDetector_tick(&p->_pd1, sample);
-    int pd2_ready = tPitchDetector_tick(&p->_pd2, -sample);
+    int pd1_ready;
+    if (!(p->sub % 2))
+    {
+        pd1_ready = tPitchDetector_tick(&p->_pd2, sample);
+        p->sub = 0;
+    }
+    else
+    {
+        pd1_ready = tPitchDetector_tick(&p->_pd1, sample);
+    }
+    p->sub++;
     
-    if (pd1_ready || pd2_ready)
+    if (pd1_ready)
     {
         int pd1_indeterminate = tPitchDetector_indeterminate(&p->_pd1);
         int pd2_indeterminate = tPitchDetector_indeterminate(&p->_pd2);
+        int disagreement = 0;
         if (!pd1_indeterminate && !pd2_indeterminate)
         {
             _pitch_info _i1 = p->_pd1->_current;
             _pitch_info _i2 = p->_pd2->_current;
             
-            float pd1_diff = fabsf(_i1.frequency - p->_mean);
-            float pd2_diff = fabsf(_i2.frequency - p->_mean);
-            _pitch_info i = (pd1_diff < pd2_diff) ? _i1 : _i2;
+            float pd1_diff = fabsf(_i1.frequency*0.5f - p->_mean);
+            float pd2_diff = fabsf(_i2.frequency*0.5f - p->_mean);
+            _pitch_info i1 = _i1;
+            i1.frequency = i1.frequency*0.5f;
+            _pitch_info i2 = _i2;
+            i2.frequency = i2.frequency*0.5f;
+    
+            _pitch_info i = (pd1_diff < pd2_diff) ? i1 : i2;
+            disagreement = fabsf(pd1_diff - pd2_diff) > 2.0f;
             
-            if (p->_first)
-            {
-                p->_current = i;
-                p->_mean = p->_current.frequency;
-                p->_first = 0;
-                p->_predicted_frequency = 0.0f;
+            if (!disagreement) {
+                if (p->_first)
+                {
+                    p->_current = i;
+                    p->_mean = p->_current.frequency;
+                    p->_first = 0;
+                    p->_predicted_frequency = 0.0f;
+                }
+                else
+                {
+                    p->_current = i;
+                    p->_mean = (0.2222222 * p->_current.frequency) + (0.7777778 * p->_mean);
+                    p->_predicted_frequency = 0.0f;
+                }
             }
-            else
-            {
-                p->_current = i;
-                p->_mean = (0.222222f * p->_current.frequency) + (0.777778f * p->_mean);
-                p->_predicted_frequency = 0.0f;
-            }
         }
         
         if (pd1_indeterminate && pd2_indeterminate)
@@ -2181,7 +2200,7 @@
         }
     }
     
-    return pd1_ready || pd2_ready;
+    return pd1_ready;
 }
 
 float   tDualPitchDetector_getFrequency    (tDualPitchDetector* const detector)
--- a/leaf/Src/leaf-effects.c
+++ b/leaf/Src/leaf-effects.c
@@ -1019,19 +1019,20 @@
 /******************************************************************************/
 
 // init
-void tSOLAD_init (tSOLAD* const wp, LEAF* const leaf)
+void tSOLAD_init (tSOLAD* const wp, int loopSize, LEAF* const leaf)
 {
-    tSOLAD_initToPool(wp, &leaf->mempool);
+    tSOLAD_initToPool(wp, loopSize, &leaf->mempool);
 }
 
-void tSOLAD_initToPool (tSOLAD* const wp, tMempool* const mp)
+void tSOLAD_initToPool (tSOLAD* const wp, int loopSize, tMempool* const mp)
 {
     _tMempool* m = *mp;
     _tSOLAD* w = *wp = (_tSOLAD*) mpool_calloc(sizeof(_tSOLAD), m);
     w->mempool = m;
     
+    w->loopSize = loopSize;
     w->pitchfactor = 1.;
-    w->delaybuf = (float*) mpool_calloc(sizeof(float) * LOOPSIZE, m);
+    w->delaybuf = (float*) mpool_calloc(sizeof(float) * w->loopSize, m);
 
     w->timeindex = 0;
     w->xfadevalue = -1;
@@ -1065,7 +1066,7 @@
     {
         float sample = tHighpass_tick(&w->hp, in[0]);
         w->delaybuf[0] = sample;
-        w->delaybuf[LOOPSIZE] = sample;   // copy one sample for interpolation
+        w->delaybuf[w->loopSize] = sample;   // copy one sample for interpolation
         n--;
         i++;
         in++;
@@ -1082,7 +1083,7 @@
     else pitchdown(w, out);
     
     w->timeindex += blocksize;
-    w->timeindex &= LOOPMASK;
+    w->timeindex &= (w->loopSize - 1);
 }
 
 // set periodicity analysis data
@@ -1090,7 +1091,8 @@
 {
     _tSOLAD* w = *wp;
     
-    if(period > MAXPERIOD) period = MAXPERIOD;
+    float maxPeriod = (float)((w->loopSize - w->blocksize) * 0.8f);
+    if(period > maxPeriod) period = maxPeriod;
     if(period > MINPERIOD) w->period = period;  // ignore period when too small
 }
 
@@ -1126,7 +1128,7 @@
 {
     _tSOLAD* w = *wp;
     
-    int n = LOOPSIZE;
+    int n = w->loopSize;
     float *buf = w->delaybuf;
     
     while(n--) *buf++ = 0;
@@ -1173,7 +1175,7 @@
 static void pitchdown(_tSOLAD* const w, float *out)
 {
     int n = w->blocksize;
-    float refindex = (float)(w->timeindex + LOOPSIZE); // no negative values!
+    float refindex = (float)(w->timeindex + w->loopSize); // no negative values!
     float pitchfactor = w->pitchfactor;
     float period = w->period;
     float readlag = w->readlag;
@@ -1291,7 +1293,7 @@
 static void pitchup(_tSOLAD* const w, float *out)
 {
     int n = w->blocksize;
-    float refindex = (float)(w->timeindex + LOOPSIZE); // no negative values
+    float refindex = (float)(w->timeindex + w->loopSize); // no negative values
     float pitchfactor = w->pitchfactor;
     float period = w->period;
     float readlag = w->readlag;
@@ -1359,7 +1361,7 @@
     int index = (int)floatindex;
     float fraction = floatindex - (float)index;
     float *buf = w->delaybuf;
-    index &= LOOPMASK;
+    index &= (w->loopSize - 1);
     
     return (buf[index] + (fraction * (buf[index+1] - buf[index])));
 }
@@ -1368,12 +1370,12 @@
 // PITCHSHIFT
 //============================================================================================================
 
-void tPitchShift_init (tPitchShift* const psr, tDualPitchDetector* const dpd, LEAF* const leaf)
+void tPitchShift_init (tPitchShift* const psr, tDualPitchDetector* const dpd, int bufSize, LEAF* const leaf)
 {
-    tPitchShift_initToPool(psr, dpd, &leaf->mempool);
+    tPitchShift_initToPool(psr, dpd, bufSize, &leaf->mempool);
 }
 
-void tPitchShift_initToPool (tPitchShift* const psr, tDualPitchDetector* const dpd, tMempool* const mp)
+void tPitchShift_initToPool (tPitchShift* const psr, tDualPitchDetector* const dpd, int bufSize, tMempool* const mp)
 {
     _tMempool* m = *mp;
     _tPitchShift* ps = *psr = (_tPitchShift*) mpool_alloc(sizeof(_tPitchShift), m);
@@ -1380,8 +1382,10 @@
     ps->mempool = m;
     
     ps->pd = *dpd;
+    ps->bufSize = bufSize;
+    ps->pickiness = 0.95f;
     
-    tSOLAD_initToPool(&ps->sola, mp);
+    tSOLAD_initToPool(&ps->sola, pow(2.0, ceil(log2(ps->bufSize * 2.0))), mp);
     tSOLAD_setPitchFactor(&ps->sola, DEFPITCHRATIO);
 }
 
@@ -1393,13 +1397,14 @@
     mpool_free((char*)ps, ps->mempool);
 }
 
-void tPitchShift_shiftBy (tPitchShift* const psr, float factor, float* in, float* out, int bufSize)
+void tPitchShift_shiftBy (tPitchShift* const psr, float factor, float* in, float* out)
 {
     _tPitchShift* ps = *psr;
     LEAF* leaf = ps->mempool->leaf;
     
     float detected = tDualPitchDetector_getFrequency(&ps->pd);
-    if (detected > 0.0f)
+    float periodicity = tDualPitchDetector_getPeriodicity(&ps->pd);
+    if (detected > 0.0f && periodicity > ps->pickiness)
     {
         float period = leaf->sampleRate / detected;
         tSOLAD_setPeriod(&ps->sola, period);
@@ -1406,16 +1411,17 @@
         tSOLAD_setPitchFactor(&ps->sola, factor);
     }
         
-    tSOLAD_ioSamples(&ps->sola, in, out, bufSize);
+    tSOLAD_ioSamples(&ps->sola, in, out, ps->bufSize);
 }
 
-void    tPitchShift_shiftTo (tPitchShift* const psr, float freq, float* in, float* out, int bufSize)
+void    tPitchShift_shiftTo (tPitchShift* const psr, float freq, float* in, float* out)
 {
     _tPitchShift* ps = *psr;
     LEAF* leaf = ps->mempool->leaf;
     
     float detected = tDualPitchDetector_getFrequency(&ps->pd);
-    if (detected > 0.0f)
+    float periodicity = tDualPitchDetector_getPeriodicity(&ps->pd);
+    if (detected > 0.0f && periodicity > ps->pickiness)
     {
         float period = 1.0f / detected;
         float factor = freq * period;
@@ -1423,10 +1429,17 @@
         tSOLAD_setPitchFactor(&ps->sola, factor);
     }
     
-    tSOLAD_ioSamples(&ps->sola, in, out, bufSize);
+    tSOLAD_ioSamples(&ps->sola, in, out, ps->bufSize);
 }
 
+void    tPitchShift_setPickiness (tPitchShift* const psr, float p)
+{
+    _tPitchShift* ps = *psr;
+    
+    ps->pickiness = p;
+}
 
+
 //============================================================================================================
 // SIMPLERETUNE
 //============================================================================================================
@@ -1459,7 +1472,7 @@
     
     for (int i = 0; i < r->numVoices; ++i)
     {
-        tPitchShift_initToPool(&r->ps[i], &r->dp, mp);
+        tPitchShift_initToPool(&r->ps[i], &r->dp, r->bufSize, mp);
     }
     
     r->shiftFunction = &tPitchShift_shiftBy;
@@ -1495,7 +1508,7 @@
     {
         for (int i = 0; i < r->numVoices; ++i)
         {
-            r->shiftFunction(&r->ps[i], r->shiftValues[i], r->inBuffer, r->outBuffer, r->bufSize);
+            r->shiftFunction(&r->ps[i], r->shiftValues[i], r->inBuffer, r->outBuffer);
         }
         r->index = 0;
     }
@@ -1524,6 +1537,16 @@
     tSimpleRetune_initToPool(rt, minInputFreq, maxInputFreq, numVoices, bufSize, &mempool);
 }
 
+void tSimpleRetune_setPickiness (tSimpleRetune* const rt, float p)
+{
+    _tSimpleRetune* r = *rt;
+    
+    for (int i = 0; i < r->numVoices; ++i)
+    {
+        tPitchShift_setPickiness(&r->ps[i], p);
+    }
+}
+
 void tSimpleRetune_tuneVoices(tSimpleRetune* const rt, float* t)
 {
     _tSimpleRetune* r = *rt;
@@ -1581,7 +1604,7 @@
 
     for (int i = 0; i < r->numVoices; ++i)
     {
-        tPitchShift_initToPool(&r->ps[i], &r->dp, mp);
+        tPitchShift_initToPool(&r->ps[i], &r->dp, r->bufSize, mp);
         r->outBuffers[i] = (float*) mpool_calloc(sizeof(float) * r->bufSize, m);
     }
     
@@ -1623,7 +1646,7 @@
     {
         for (int i = 0; i < r->numVoices; ++i)
         {
-            r->shiftFunction(&r->ps[i], r->shiftValues[i], r->inBuffer, r->outBuffers[i], r->bufSize);
+            r->shiftFunction(&r->ps[i], r->shiftValues[i], r->inBuffer, r->outBuffers[i]);
         }
         r->index = 0;
     }
@@ -1637,6 +1660,16 @@
     
     if (mode > 0) r->shiftFunction = &tPitchShift_shiftTo;
     else r->shiftFunction = &tPitchShift_shiftBy;
+}
+
+void tRetune_setPickiness (tRetune* const rt, float p)
+{
+    _tRetune* r = *rt;
+    
+    for (int i = 0; i < r->numVoices; ++i)
+    {
+        tPitchShift_setPickiness(&r->ps[i], p);
+    }
 }
 
 void tRetune_setNumVoices(tRetune* const rt, int numVoices)