ref: 397265f5791291defa76487c2388e1898e2e433c
dir: /src/cg/cst_mlpg.c/
/* --------------------------------------------------------------- */ /* The HMM-Based Speech Synthesis System (HTS): version 1.1.1 */ /* HTS Working Group */ /* */ /* Department of Computer Science */ /* Nagoya Institute of Technology */ /* and */ /* Interdisciplinary Graduate School of Science and Engineering */ /* Tokyo Institute of Technology */ /* Copyright (c) 2001-2003 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and */ /* distribute this software and its documentation without */ /* restriction, including without limitation the rights to use, */ /* copy, modify, merge, publish, distribute, sublicense, and/or */ /* sell copies of this work, and to permit persons to whom this */ /* work is furnished to do so, subject to the following conditions: */ /* */ /* 1. The code must retain the above copyright notice, this list */ /* of conditions and the following disclaimer. */ /* */ /* 2. Any modifications must be clearly marked as such. */ /* */ /* NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF TECHNOLOGY, */ /* HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM */ /* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL */ /* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSITITUTE OF */ /* TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY */ /* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, */ /* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS */ /* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR */ /* PERFORMANCE OF THIS SOFTWARE. */ /* */ /* --------------------------------------------------------------- */ /* mlpg.c : speech parameter generation from pdf sequence */ /* */ /* 2003/12/26 by Heiga Zen */ /* --------------------------------------------------------------- */ /*********************************************************************/ /* */ /* Nagoya Institute of Technology, Aichi, Japan, */ /* and */ /* Carnegie Mellon University, Pittsburgh, PA */ /* Copyright (c) 2003-2004,2008 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and */ /* distribute this software and its documentation without */ /* restriction, including without limitation the rights to use, */ /* copy, modify, merge, publish, distribute, sublicense, and/or */ /* sell copies of this work, and to permit persons to whom this */ /* work is furnished to do so, subject to the following conditions: */ /* */ /* 1. The code must retain the above copyright notice, this list */ /* of conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* */ /* NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, AND */ /* THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH */ /* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE */ /* OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY, NOR THE CONTRIBUTORS */ /* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR */ /* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR */ /* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER */ /* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE */ /* OR PERFORMANCE OF THIS SOFTWARE. */ /* */ /*********************************************************************/ /* */ /* Author : Tomoki Toda (tomoki@ics.nitech.ac.jp) */ /* Date : June 2004 */ /* */ /* Modified as a single file for inclusion in festival/flite */ /* May 2008 awb@cs.cmu.edu */ /*-------------------------------------------------------------------*/ /* */ /* ML-Based Parameter Generation */ /* */ /*-------------------------------------------------------------------*/ #include "cst_alloc.h" #include "cst_string.h" #include "cst_math.h" #include "cst_track.h" #include "cst_wave.h" #include "cst_vc.h" #include "cst_mlpg.h" #define mlpg_alloc(X,Y) (cst_alloc(Y,X)) #define mlpg_free cst_free static MLPGPARA xmlpgpara_init(int dim, int dim2, int dnum, int clsnum) { MLPGPARA param; /* memory allocation */ param = mlpg_alloc(1,struct MLPGPARA_STRUCT); param->ov = xdvalloc(dim); param->iuv = NODATA; param->iumv = NODATA; param->flkv = xdvalloc(dnum); param->stm = NODATA; param->dltm = xdmalloc(dnum, dim2); param->pdf = NODATA; param->detvec = NODATA; param->wght = xdmalloc(clsnum, 1); param->mean = xdmalloc(clsnum, dim); param->cov = NODATA; param->clsidxv = NODATA; /* dia_flag */ param->clsdetv = xdvalloc(1); param->clscov = xdmalloc(1, dim); param->vdet = 1.0; param->vm = NODATA; param->vv = NODATA; param->var = NODATA; return param; } static void xmlpgparafree(MLPGPARA param) { if (param != NODATA) { if (param->ov != NODATA) xdvfree(param->ov); if (param->iuv != NODATA) xdvfree(param->iuv); if (param->iumv != NODATA) xdvfree(param->iumv); if (param->flkv != NODATA) xdvfree(param->flkv); if (param->stm != NODATA) xdmfree(param->stm); if (param->dltm != NODATA) xdmfree(param->dltm); if (param->pdf != NODATA) xdmfree(param->pdf); if (param->detvec != NODATA) xdvfree(param->detvec); if (param->wght != NODATA) xdmfree(param->wght); if (param->mean != NODATA) xdmfree(param->mean); if (param->cov != NODATA) xdmfree(param->cov); if (param->clsidxv != NODATA) xlvfree(param->clsidxv); if (param->clsdetv != NODATA) xdvfree(param->clsdetv); if (param->clscov != NODATA) xdmfree(param->clscov); if (param->vm != NODATA) xdvfree(param->vm); if (param->vv != NODATA) xdvfree(param->vv); if (param->var != NODATA) xdvfree(param->var); mlpg_free(param); } return; } static double get_like_pdfseq_vit(int dim, int dim2, int dnum, int clsnum, MLPGPARA param, float **model, XBOOL dia_flag) { long d, c, k, l, j; double sumgauss; double like = 0.0; for (d = 0, like = 0.0; d < dnum; d++) { /* read weight and mean sequences */ param->wght->data[0][0] = 0.9; /* FIXME weights */ for (j=0; j<dim; j++) param->mean->data[0][j] = model[d][(j+1)*2]; /* observation vector */ for (k = 0; k < dim2; k++) { param->ov->data[k] = param->stm->data[d][k]; param->ov->data[k + dim2] = param->dltm->data[d][k]; } /* mixture index */ c = d; param->clsdetv->data[0] = param->detvec->data[c]; /* calculating likelihood */ if (dia_flag == XTRUE) { for (k = 0; k < param->clscov->col; k++) param->clscov->data[0][k] = param->cov->data[c][k]; sumgauss = get_gauss_dia(0, param->ov, param->clsdetv, param->wght, param->mean, param->clscov); } else { for (k = 0; k < param->clscov->row; k++) for (l = 0; l < param->clscov->col; l++) param->clscov->data[k][l] = param->cov->data[k + param->clscov->row * c][l]; sumgauss = get_gauss_full(0, param->ov, param->clsdetv, param->wght, param->mean, param->clscov); } if (sumgauss <= 0.0) param->flkv->data[d] = -1.0 * INFTY2; else param->flkv->data[d] = log(sumgauss); like += param->flkv->data[d]; /* estimating U', U'*M */ if (dia_flag == XTRUE) { /* PDF [U'*M U'] */ for (k = 0; k < dim; k++) { param->pdf->data[d][k] = param->clscov->data[0][k] * param->mean->data[0][k]; param->pdf->data[d][k + dim] = param->clscov->data[0][k]; } } else { /* PDF [U'*M U'] */ for (k = 0; k < dim; k++) { param->pdf->data[d][k] = 0.0; for (l = 0; l < dim; l++) { param->pdf->data[d][k * dim + dim + l] = param->clscov->data[k][l]; param->pdf->data[d][k] += param->clscov->data[k][l] * param->mean->data[0][l]; } } } } like /= (double)dnum; return like; } #if 0 static double get_like_gv(long dim2, long dnum, MLPGPARA param) { long k; double av = 0.0, dif = 0.0; double vlike = -INFTY; if (param->vm != NODATA && param->vv != NODATA) { for (k = 0; k < dim2; k++) calc_varstats(param->stm->data, k, dnum, &av, &(param->var->data[k]), &dif); vlike = log(get_gauss_dia5(param->vdet, 1.0, param->var, param->vm, param->vv)); } return vlike; } static void sm_mvav(DMATRIX mat, long hlen) { long k, l, m, p; double d, sd; DVECTOR vec = NODATA; DVECTOR win = NODATA; vec = xdvalloc(mat->row); /* smoothing window */ win = xdvalloc(hlen * 2 + 1); for (k = 0, d = 1.0, sd = 0.0; k < hlen; k++, d += 1.0) { win->data[k] = d; win->data[win->length - k - 1] = d; sd += d + d; } win->data[k] = d; sd += d; for (k = 0; k < win->length; k++) win->data[k] /= sd; for (l = 0; l < mat->col; l++) { for (k = 0; k < mat->row; k++) { for (m = 0, vec->data[k] = 0.0; m < win->length; m++) { p = k - hlen + m; if (p >= 0 && p < mat->row) vec->data[k] += mat->data[p][l] * win->data[m]; } } for (k = 0; k < mat->row; k++) mat->data[k][l] = vec->data[k]; } xdvfree(win); xdvfree(vec); return; } #endif static void get_dltmat(DMATRIX mat, DWin *dw, int dno, DMATRIX dmat) { int i, j, k, tmpnum; tmpnum = (int)mat->row - dw->width[dno][WRIGHT]; for (k = dw->width[dno][WRIGHT]; k < tmpnum; k++) /* time index */ for (i = 0; i < (int)mat->col; i++) /* dimension index */ for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0; j <= dw->width[dno][WRIGHT]; j++) dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j]; for (i = 0; i < (int)mat->col; i++) { /* dimension index */ for (k = 0; k < dw->width[dno][WRIGHT]; k++) /* time index */ for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0; j <= dw->width[dno][WRIGHT]; j++) if (k + j >= 0) dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j]; else dmat->data[k][i] += (2.0 * mat->data[0][i] - mat->data[-k - j][i]) * dw->coef[dno][j]; for (k = tmpnum; k < (int)mat->row; k++) /* time index */ for (j = dw->width[dno][WLEFT], dmat->data[k][i] = 0.0; j <= dw->width[dno][WRIGHT]; j++) if (k + j < (int)mat->row) dmat->data[k][i] += mat->data[k + j][i] * dw->coef[dno][j]; else dmat->data[k][i] += (2.0 * mat->data[mat->row - 1][i] - mat->data[mat->row - k - j + mat->row - 2][i]) * dw->coef[dno][j]; } return; } static double *dcalloc(int x, int xoff) { double *ptr; ptr = mlpg_alloc(x,double); /* ptr += xoff; */ /* Just not going to allow this */ return(ptr); } static double **ddcalloc(int x, int y, int xoff, int yoff) { double **ptr; register int i; ptr = mlpg_alloc(x,double *); for (i = 0; i < x; i++) ptr[i] = dcalloc(y, yoff); /* ptr += xoff; */ /* Just not going to allow this */ return(ptr); } /***********************************/ /* ML using Choleski decomposition */ /***********************************/ static void InitDWin(PStreamChol *pst, const float *dynwin, int fsize) { int i,j; int leng; pst->dw.num = 1; /* only static */ if (dynwin) { pst->dw.num = 2; /* static + dyn */ } /* memory allocation */ pst->dw.width = mlpg_alloc(pst->dw.num,int *); for (i = 0; i < pst->dw.num; i++) pst->dw.width[i] = mlpg_alloc(2,int); pst->dw.coef = mlpg_alloc(pst->dw.num, double *); pst->dw.coef_ptrs = mlpg_alloc(pst->dw.num, double *); /* window for static parameter WLEFT = 0, WRIGHT = 1 */ pst->dw.width[0][WLEFT] = pst->dw.width[0][WRIGHT] = 0; pst->dw.coef_ptrs[0] = mlpg_alloc(1,double); pst->dw.coef[0] = pst->dw.coef_ptrs[0]; pst->dw.coef[0][0] = 1.0; /* set delta coefficients */ for (i = 1; i < pst->dw.num; i++) { pst->dw.coef_ptrs[i] = mlpg_alloc(fsize, double); pst->dw.coef[i] = pst->dw.coef_ptrs[i]; for (j=0; j<fsize; j++) /* FIXME make dynwin doubles for memmove */ pst->dw.coef[i][j] = (double)dynwin[j]; /* set pointer */ leng = fsize / 2; /* L (fsize = 2 * L + 1) */ pst->dw.coef[i] += leng; /* [L] -> [0] center */ pst->dw.width[i][WLEFT] = -leng; /* -L left */ pst->dw.width[i][WRIGHT] = leng; /* L right */ if (fsize % 2 == 0) pst->dw.width[i][WRIGHT]--; } pst->dw.maxw[WLEFT] = pst->dw.maxw[WRIGHT] = 0; for (i = 0; i < pst->dw.num; i++) { if (pst->dw.maxw[WLEFT] > pst->dw.width[i][WLEFT]) pst->dw.maxw[WLEFT] = pst->dw.width[i][WLEFT]; if (pst->dw.maxw[WRIGHT] < pst->dw.width[i][WRIGHT]) pst->dw.maxw[WRIGHT] = pst->dw.width[i][WRIGHT]; } return; } static void InitPStreamChol(PStreamChol *pst, const float *dynwin, int fsize, int order, int T) { /* order of cepstrum */ pst->order = order; /* windows for dynamic feature */ InitDWin(pst, dynwin, fsize); /* dimension of observed vector */ pst->vSize = (pst->order + 1) * pst->dw.num; /* odim = dim * (1--3) */ /* memory allocation */ pst->T = T; /* number of frames */ pst->width = pst->dw.maxw[WRIGHT] * 2 + 1; /* width of R */ pst->mseq = ddcalloc(T, pst->vSize, 0, 0); /* [T][odim] */ pst->ivseq = ddcalloc(T, pst->vSize, 0, 0); /* [T][odim] */ pst->R = ddcalloc(T, pst->width, 0, 0); /* [T][width] */ pst->r = dcalloc(T, 0); /* [T] */ pst->g = dcalloc(T, 0); /* [T] */ pst->c = ddcalloc(T, pst->order + 1, 0, 0); /* [T][dim] */ return; } static void mlgparaChol(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp) { int t, d; /* error check */ if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) { cst_errmsg("Error mlgparaChol: Different dimension\n"); cst_error(); } /* mseq: U^{-1}*M, ifvseq: U^{-1} */ for (t = 0; t < pst->T; t++) { for (d = 0; d < pst->vSize; d++) { pst->mseq[t][d] = pdf->data[t][d]; pst->ivseq[t][d] = pdf->data[t][pst->vSize + d]; } } /* ML parameter generation */ mlpgChol(pst); /* extracting parameters */ for (t = 0; t < pst->T; t++) for (d = 0; d <= pst->order; d++) mlgp->data[t][d] = pst->c[t][d]; return; } /* generate parameter sequence from pdf sequence using Choleski decomposition */ static void mlpgChol(PStreamChol *pst) { register int m; /* generating parameter in each dimension */ for (m = 0; m <= pst->order; m++) { calc_R_and_r(pst, m); Choleski(pst); Choleski_forward(pst); Choleski_backward(pst, m); } return; } /* parameter generation fuctions */ /* calc_R_and_r: calculate R = W'U^{-1}W and r = W'U^{-1}M */ static void calc_R_and_r(PStreamChol *pst, const int m) { register int i, j, k, l, n; double wu; for (i = 0; i < pst->T; i++) { pst->r[i] = pst->mseq[i][m]; pst->R[i][0] = pst->ivseq[i][m]; for (j = 1; j < pst->width; j++) pst->R[i][j] = 0.0; for (j = 1; j < pst->dw.num; j++) { for (k = pst->dw.width[j][0]; k <= pst->dw.width[j][1]; k++) { n = i + k; if (n >= 0 && n < pst->T && pst->dw.coef[j][-k] != 0.0) { l = j * (pst->order + 1) + m; pst->r[i] += pst->dw.coef[j][-k] * pst->mseq[n][l]; wu = pst->dw.coef[j][-k] * pst->ivseq[n][l]; for (l = 0; l < pst->width; l++) { n = l-k; if (n <= pst->dw.width[j][1] && i + l < pst->T && pst->dw.coef[j][n] != 0.0) pst->R[i][l] += wu * pst->dw.coef[j][n]; } } } } } return; } /* Choleski: Choleski factorization of Matrix R */ static void Choleski(PStreamChol *pst) { register int t, j, k; pst->R[0][0] = sqrt(pst->R[0][0]); for (j = 1; j < pst->width; j++) pst->R[0][j] /= pst->R[0][0]; for (t = 1; t < pst->T; t++) { for (j = 1; j < pst->width; j++) if (t - j >= 0) pst->R[t][0] -= pst->R[t - j][j] * pst->R[t - j][j]; pst->R[t][0] = sqrt(pst->R[t][0]); for (j = 1; j < pst->width; j++) { for (k = 0; k < pst->dw.maxw[WRIGHT]; k++) if (j != pst->width - 1) pst->R[t][j] -= pst->R[t - k - 1][j - k] * pst->R[t - k - 1][j + 1]; pst->R[t][j] /= pst->R[t][0]; } } return; } /* Choleski_forward: forward substitution to solve linear equations */ static void Choleski_forward(PStreamChol *pst) { register int t, j; double hold; pst->g[0] = pst->r[0] / pst->R[0][0]; for (t=1; t < pst->T; t++) { hold = 0.0; for (j = 1; j < pst->width; j++) if (t - j >= 0 && pst->R[t - j][j] != 0.0) hold += pst->R[t - j][j] * pst->g[t - j]; pst->g[t] = (pst->r[t] - hold) / pst->R[t][0]; } return; } /* Choleski_backward: backward substitution to solve linear equations */ static void Choleski_backward(PStreamChol *pst, const int m) { register int t, j; double hold; pst->c[pst->T - 1][m] = pst->g[pst->T - 1] / pst->R[pst->T - 1][0]; for (t = pst->T - 2; t >= 0; t--) { hold = 0.0; for (j = 1; j < pst->width; j++) if (t + j < pst->T && pst->R[t][j] != 0.0) hold += pst->R[t][j] * pst->c[t + j][m]; pst->c[t][m] = (pst->g[t] - hold) / pst->R[t][0]; } return; } /* ML Considering Global Variance */ #if 0 static void varconv(double **c, const int m, const int T, const double var) { register int n; double sd, osd; double oav = 0.0, ovar = 0.0, odif = 0.0; calc_varstats(c, m, T, &oav, &ovar, &odif); osd = sqrt(ovar); sd = sqrt(var); for (n = 0; n < T; n++) c[n][m] = (c[n][m] - oav) / osd * sd + oav; return; } static void calc_varstats(double **c, const int m, const int T, double *av, double *var, double *dif) { register int i; register double d; *av = 0.0; *var = 0.0; *dif = 0.0; for (i = 0; i < T; i++) *av += c[i][m]; *av /= (double)T; for (i = 0; i < T; i++) { d = c[i][m] - *av; *var += d * d; *dif += d; } *var /= (double)T; return; } /* Diagonal Covariance Version */ static void mlgparaGrad(DMATRIX pdf, PStreamChol *pst, DMATRIX mlgp, const int max, double th, double e, double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag, XBOOL extvflag) { int t, d; /* error check */ if (pst->vSize * 2 != pdf->col || pst->order + 1 != mlgp->col) { cst_errmsg("Error mlgparaChol: Different dimension\n"); cst_error(); } /* mseq: U^{-1}*M, ifvseq: U^{-1} */ for (t = 0; t < pst->T; t++) { for (d = 0; d < pst->vSize; d++) { pst->mseq[t][d] = pdf->data[t][d]; pst->ivseq[t][d] = pdf->data[t][pst->vSize + d]; } } /* ML parameter generation */ mlpgChol(pst); /* extend variance */ if (extvflag == XTRUE) for (d = 0; d <= pst->order; d++) varconv(pst->c, d, pst->T, vm->data[d]); /* estimating parameters */ mlpgGrad(pst, max, th, e, alpha, vm, vv, nrmflag); /* extracting parameters */ for (t = 0; t < pst->T; t++) for (d = 0; d <= pst->order; d++) mlgp->data[t][d] = pst->c[t][d]; return; } /* generate parameter sequence from pdf sequence using gradient */ static void mlpgGrad(PStreamChol *pst, const int max, double th, double e, double alpha, DVECTOR vm, DVECTOR vv, XBOOL nrmflag) { register int m, i, t; double diff, n, dth; if (nrmflag == XTRUE) n = (double)(pst->T * pst->vSize) / (double)(vm->length); else n = 1.0; /* generating parameter in each dimension */ for (m = 0; m <= pst->order; m++) { calc_R_and_r(pst, m); dth = th * sqrt(vm->data[m]); for (i = 0; i < max; i++) { calc_grad(pst, m); if (vm != NODATA && vv != NODATA) calc_vargrad(pst, m, alpha, n, vm->data[m], vv->data[m]); for (t = 0, diff = 0.0; t < pst->T; t++) { diff += pst->g[t] * pst->g[t]; pst->c[t][m] += e * pst->g[t]; } diff = sqrt(diff / (double)pst->T); if (diff < dth || diff == 0.0) break; } } return; } /* calc_grad: calculate -RX + r = -W'U^{-1}W * X + W'U^{-1}M */ void calc_grad(PStreamChol *pst, const int m) { register int i, j; for (i = 0; i < pst->T; i++) { pst->g[i] = pst->r[i] - pst->c[i][m] * pst->R[i][0]; for (j = 1; j < pst->width; j++) { if (i + j < pst->T) pst->g[i] -= pst->c[i + j][m] * pst->R[i][j]; if (i - j >= 0) pst->g[i] -= pst->c[i - j][m] * pst->R[i - j][j]; } } return; } static void calc_vargrad(PStreamChol *pst, const int m, double alpha, double n, double vm, double vv) { register int i; double vg, w1, w2; double av = 0.0, var = 0.0, dif = 0.0; if (alpha > 1.0 || alpha < 0.0) { w1 = 1.0; w2 = 1.0; } else { w1 = alpha; w2 = 1.0 - alpha; } calc_varstats(pst->c, m, pst->T, &av, &var, &dif); for (i = 0; i < pst->T; i++) { vg = -(var - vm) * (pst->c[i][m] - av) * vv * 2.0 / (double)pst->T; pst->g[i] = w1 * pst->g[i] / n + w2 * vg; } return; } #endif /* diagonal covariance */ static DVECTOR xget_detvec_diamat2inv(DMATRIX covmat) /* [num class][dim] */ { long dim, clsnum; long i, j; double det; DVECTOR detvec = NODATA; /* In cases where the determinant of the matrix ends up being */ /* zero, we can fix this by artificially setting the covariance */ /* to be a magic number. I chose this magic number by computing */ /* the mean of all MCEP SDs of all leaves in a standard RMS */ /* voice. - Prasanna */ /* double magic_covariance = 0.0962*0.0962; */ double magic_covariance = 0.0962; clsnum = covmat->row; dim = covmat->col; /* memory allocation */ detvec = xdvalloc(clsnum); for (i = 0; i < clsnum; i++) { for (j = 0, det = 1.0; j < dim; j++) { det *= covmat->data[i][j]; if (det > 0.0) { covmat->data[i][j] = 1.0 / covmat->data[i][j]; } else { covmat->data[i][j] = 1.0 / magic_covariance; det = pow(magic_covariance, j+1); /* cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det); */ } } detvec->data[i] = det; } return detvec; } static double get_gauss_full(long clsidx, DVECTOR vec, /* [dim] */ DVECTOR detvec, /* [clsnum] */ DMATRIX weightmat, /* [clsnum][1] */ DMATRIX meanvec, /* [clsnum][dim] */ DMATRIX invcovmat) /* [clsnum * dim][dim] */ { double gauss; if (detvec->data[clsidx] <= 0.0) { cst_errmsg("#error: det <= 0.0\n"); cst_error(); } gauss = weightmat->data[clsidx][0] / sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx]) * exp(-1.0 * cal_xmcxmc(clsidx, vec, meanvec, invcovmat) / 2.0); return gauss; } static double cal_xmcxmc(long clsidx, DVECTOR x, DMATRIX mm, /* [num class][dim] */ DMATRIX cm) /* [num class * dim][dim] */ { long clsnum, k, l, b, dim; double *vec = NULL; double td, d; dim = x->length; clsnum = mm->row; b = clsidx * dim; if (mm->col != dim || cm->col != dim || clsnum * dim != cm->row) { cst_errmsg("Error cal_xmcxmc: different dimension\n"); cst_error(); } /* memory allocation */ vec = mlpg_alloc((int)dim, double); for (k = 0; k < dim; k++) vec[k] = x->data[k] - mm->data[clsidx][k]; for (k = 0, d = 0.0; k < dim; k++) { for (l = 0, td = 0.0; l < dim; l++) td += vec[l] * cm->data[l + b][k]; d += td * vec[k]; } /* memory free */ mlpg_free(vec); vec = NULL; return d; } #if 0 /* diagonal covariance */ static double get_gauss_dia5(double det, double weight, DVECTOR vec, /* dim */ DVECTOR meanvec, /* dim */ DVECTOR invcovvec) /* dim */ { double gauss, sb; long k; if (det <= 0.0) { cst_errmsg("#error: det <= 0.0\n"); cst_error(); } for (k = 0, gauss = 0.0; k < vec->length; k++) { sb = vec->data[k] - meanvec->data[k]; gauss += sb * invcovvec->data[k] * sb; } gauss = weight / sqrt(pow(2.0 * PI, (double)vec->length) * det) * exp(-gauss / 2.0); return gauss; } #endif static double get_gauss_dia(long clsidx, DVECTOR vec, /* [dim] */ DVECTOR detvec, /* [clsnum] */ DMATRIX weightmat, /* [clsnum][1] */ DMATRIX meanmat, /* [clsnum][dim] */ DMATRIX invcovmat) /* [clsnum][dim] */ { double gauss, sb; long k; if (detvec->data[clsidx] <= 0.0) { cst_errmsg("#error: det <= 0.0\n"); cst_error(); } for (k = 0, gauss = 0.0; k < vec->length; k++) { sb = vec->data[k] - meanmat->data[clsidx][k]; gauss += sb * invcovmat->data[clsidx][k] * sb; } gauss = weightmat->data[clsidx][0] / sqrt(pow(2.0 * PI, (double)vec->length) * detvec->data[clsidx]) * exp(-gauss / 2.0); return gauss; } static void pst_free(PStreamChol *pst) { int i; for (i=0; i<pst->dw.num; i++) mlpg_free(pst->dw.width[i]); mlpg_free(pst->dw.width); pst->dw.width = NULL; for (i=0; i<pst->dw.num; i++) mlpg_free(pst->dw.coef_ptrs[i]); mlpg_free(pst->dw.coef); pst->dw.coef = NULL; mlpg_free(pst->dw.coef_ptrs); pst->dw.coef_ptrs = NULL; for (i=0; i<pst->T; i++) mlpg_free(pst->mseq[i]); mlpg_free(pst->mseq); for (i=0; i<pst->T; i++) mlpg_free(pst->ivseq[i]); mlpg_free(pst->ivseq); for (i=0; i<pst->T; i++) mlpg_free(pst->R[i]); mlpg_free(pst->R); mlpg_free(pst->r); mlpg_free(pst->g); for (i=0; i<pst->T; i++) mlpg_free(pst->c[i]); mlpg_free(pst->c); return; } cst_track *mlpg(const cst_track *param_track, cst_cg_db *cg_db) { /* Generate an (mcep) track using Maximum Likelihood Parameter Generation */ MLPGPARA param = NODATA; cst_track *out; int dim, dim_st; /* float like; */ int i,j; int nframes; PStreamChol pst; nframes = param_track->num_frames; dim = (param_track->num_channels/2)-1; dim_st = dim/2; /* dim2 in original code */ out = new_track(); cst_track_resize(out,nframes,dim_st+1); param = xmlpgpara_init(dim,dim_st,nframes,nframes); /* mixture-index sequence */ param->clsidxv = xlvalloc(nframes); for (i=0; i<nframes; i++) param->clsidxv->data[i] = i; /* initial static feature sequence */ param->stm = xdmalloc(nframes,dim_st); for (i=0; i<nframes; i++) { for (j=0; j<dim_st; j++) param->stm->data[i][j] = param_track->frames[i][(j+1)*2]; } /* Load cluster means */ for (i=0; i<nframes; i++) for (j=0; j<dim_st; j++) param->mean->data[i][j] = param_track->frames[i][(j+1)*2]; /* GMM parameters diagonal covariance */ InitPStreamChol(&pst, cg_db->dynwin, cg_db->dynwinsize, dim_st-1, nframes); param->pdf = xdmalloc(nframes,dim*2); param->cov = xdmalloc(nframes,dim); for (i=0; i<nframes; i++) for (j=0; j<dim; j++) param->cov->data[i][j] = param_track->frames[i][(j+1)*2+1] * param_track->frames[i][(j+1)*2+1]; param->detvec = xget_detvec_diamat2inv(param->cov); /* global variance parameters */ /* TBD get_gv_mlpgpara(param, vmfile, vvfile, dim2, msg_flag); */ get_dltmat(param->stm, &pst.dw, 1, param->dltm); get_like_pdfseq_vit(dim, dim_st, nframes, nframes, param, param_track->frames, XTRUE); /* vlike = get_like_gv(dim2, dnum, param); */ mlgparaChol(param->pdf, &pst, param->stm); /* Put the answer back into the output track */ for (i=0; i<nframes; i++) { out->times[i] = param_track->times[i]; out->frames[i][0] = param_track->frames[i][0]; /* F0 */ for (j=0; j<dim_st; j++) out->frames[i][j+1] = param->stm->data[i][j]; } /* memory free */ xmlpgparafree(param); pst_free(&pst); return out; }