shithub: aubio

--- /dev/null

+++ b/python/tests/eval_pitch

@@ -1,0 +1,143 @@

+#! /usr/bin/env python

+"""

+Script to evaluate pitch algorithms against TONAS database.

+See http://mtg.upf.edu/download/datasets/tonas/

+Example run:

+    $ ./eval_pitch /path/to/TONAS/*/*.wav

+    OK:  94.74% vx r:  96.87% vx f:  15.83% f0:  96.02% %12:   0.50% /path/to/TONAS/Deblas/01-D_AMairena.wav

+    OK:  89.89% vx r:  93.21% vx f:  13.81% f0:  90.74% %12:   1.51% /path/to/TONAS/Deblas/02-D_ChanoLobato.wav

+    OK:  96.02% vx r:  96.73% vx f:  10.91% f0:  96.42% %12:   0.00% /path/to/TONAS/Deblas/03-D_Chocolate.wav

+    [...]

+    OK:  82.35% vx r:  95.52% vx f:  67.09% f0:  89.80% %12:   0.95% /path/to/TONAS/Martinetes2/80-M2_Rancapinos.wav

+    OK:  61.97% vx r:  85.71% vx f:  22.03% f0:  55.63% %12:   8.57% /path/to/TONAS/Martinetes2/81-M2_SDonday.wav

+    OK:  75.26% vx r:  91.63% vx f:  27.27% f0:  75.99% %12:   5.05% /path/to/TONAS/Martinetes2/82-M2_TiaAnicalaPiriniaca.wav

+    OK:  82.77% vx r:  92.74% vx f:  38.27% f0:  87.33% %12:   1.67% 69 files, total_length: 1177.69s, total runtime: 25.91s

+"""

+import sys

+import time

+import os.path

+import numpy

+from utils import array_from_text_file, array_from_yaml_file

+from aubio import source, pitch, freqtomidi

+start = time.time()

+freq_tol = .50 # more or less half a tone

+methods = ["default", "yinfft", "mcomb", "yin", "fcomb", "schmitt", "specacf"]

+method = methods[0]

+downsample = 1

+tolerance =  0.35

+silence = -40.

+skip = 1

+if method in ["yinfft", "default"]:

+    downsample = 1

+    tolerance = 0.45

+elif method == "mcomb":

+    downsample = 4

+elif method == "yin":

+    downsample = 4

+    tolerance = 0.2

+samplerate = 44100 / downsample

+hop_s = 512 / downsample

+win_s = 2048 / downsample

+def get_pitches (filename, samplerate = samplerate, win_s = win_s, hop_s = hop_s):

+    s = source(filename, samplerate, hop_s)

+    samplerate = s.samplerate

+    p = pitch(method, win_s, hop_s, samplerate)

+    p.set_unit("freq")

+    p.set_tolerance(tolerance)

+    p.set_silence(silence)

+    # list of pitches, in samples

+    pitches = []

+    # total number of frames read

+    total_frames = 0

+    while True:

+        samples, read = s()

+        new_pitch = p(samples)[0]

+        pitches.append([total_frames/float(samplerate), new_pitch])

+        total_frames += read

+        if read < hop_s: break

+    return numpy.array(pitches)

+total_correct_f0, total_correct_sil, total_missed, total_incorrect, total_fp, total_total = 0, 0, 0, 0, 0, 0

+total_correct_chroma, total_voiced = 0, 0

+for source_file in sys.argv[1:]:

+    ground_truth_file = source_file.replace('.wav', '.f0.Corrected')

+    if os.path.isfile(ground_truth_file):

+        ground_truth = array_from_text_file(ground_truth_file)[:,[0,2]]

+        experiment = get_pitches(source_file)

+        # check that we have the same length, more or less one frame

+        assert abs(len(ground_truth) - len(experiment)) < 2

+        # align experiment by skipping first results

+        experiment = experiment[skip:]

+        experiment[:,0] -= experiment[0,0]

+        # trim to shortest list

+        maxlen = min(len(ground_truth), len(experiment))

+        experiment = experiment[:maxlen]

+        ground_truth = ground_truth[:maxlen]

+        # get difference matrix

+        diffmat = abs(experiment - ground_truth)

+        # make sure we got the timing right

+        assert max(diffmat[:,0]) < 10e-4, source_file

+        truth_pitches = freqtomidi(ground_truth[:,1])

+        exper_pitches = freqtomidi(experiment[:,1])

+        total = len(truth_pitches)

+        unvoiced = len(truth_pitches[truth_pitches == 0])

+        voiced = total - unvoiced

+        correct_sil, fp, missed, correct_f0, correct_chroma, incorrect = 0, 0, 0, 0, 0, 0

+        for a, b in zip(truth_pitches, exper_pitches):

+            if a == 0 and b == 0:

+                correct_sil += 1

+            elif a == 0 and b != 0:

+                fp += 1

+            elif a != 0 and b == 0:

+                missed += 1

+            elif abs(b - a) < freq_tol:

+                correct_f0 += 1

+            elif abs(b - a) % 12. < freq_tol:

+                correct_chroma += 1

+            else:

+                incorrect += 1

+        assert correct_sil + fp + missed + correct_f0 + correct_chroma + incorrect == total

+        assert unvoiced == correct_sil + fp

+        assert voiced == missed + correct_f0 + correct_chroma + incorrect

+        print "OK: %6s%%" % ("%.2f" % (100. * (correct_f0 + correct_sil) / total )),

+        print "vx r: %6s%%" % ("%.2f" % (100. - 100. * missed / voiced)),

+        print "vx f: %6s%%" % ("%.2f" % (100. * fp / unvoiced)),

+        print "f0: %6s%%" % ("%.2f" % (100. * correct_f0 / voiced)),

+        print "%%12: %6s%%" % ("%.2f" % (100. * correct_chroma / voiced)),

+        print source_file

+        total_correct_sil += correct_sil

+        total_correct_f0 += correct_f0

+        total_correct_chroma += correct_chroma

+        total_missed += missed

+        total_incorrect += incorrect

+        total_fp += fp

+        total_voiced += voiced

+        total_total += total

+    else:

+        print "ERR", "could not find ground_truth_file", ground_truth_file

+print "OK: %6s%%" % ("%.2f" % (100. * (total_correct_f0 + total_correct_sil) / total_total )),

+print "vx r: %6s%%" % ("%.2f" % (100. - 100. * total_missed / total_voiced)),

+print "vx f: %6s%%" % ("%.2f" % (100. * (total_fp) / (total_correct_sil + total_fp))),

+print "f0: %6s%%" % ("%.2f" % (100. * total_correct_f0 / total_voiced)),

+print "%%12: %6s%%" % ("%.2f" % (100. * total_correct_chroma / total_voiced)),

+print "%d files," % len(sys.argv[1:]),

+print "total_length: %.2fs," % ((total_total * hop_s) / float(samplerate)),

+print "total runtime: %.2fs" % (time.time() - start)

--

⑨