ref: ecd3c41205925e009b67962166b6f3b226a04a03
dir: /libfaac/ltp.c/
/************************************************************************** This software module was originally developed by Nokia in the course of development of the MPEG-2 AAC/MPEG-4 Audio standard ISO/IEC13818-7, 14496-1, 2 and 3. This software module is an implementation of a part of one or more MPEG-2 AAC/MPEG-4 Audio tools as specified by the MPEG-2 aac/MPEG-4 Audio standard. ISO/IEC gives users of the MPEG-2aac/MPEG-4 Audio standards free license to this software module or modifications thereof for use in hardware or software products claiming conformance to the MPEG-2 aac/MPEG-4 Audio standards. Those intending to use this software module in hardware or software products are advised that this use may infringe existing patents. The original developer of this software module, the subsequent editors and their companies, and ISO/IEC have no liability for use of this software module or modifications thereof in an implementation. Copyright is not released for non MPEG-2 aac/MPEG-4 Audio conforming products. The original developer retains full right to use the code for the developer's own purpose, assign or donate the code to a third party and to inhibit third party from using the code for non MPEG-2 aac/MPEG-4 Audio conforming products. This copyright notice must be included in all copies or derivative works. Copyright (c)1997. ***************************************************************************/ /* * $Id: ltp.c,v 1.10 2012/03/01 18:34:17 knik Exp $ */ #include <stdio.h> #include <math.h> #include "frame.h" #include "coder.h" #include "ltp.h" #include "tns.h" #include "filtbank.h" #include "util.h" /* short double_to_int(double sig_in); */ #define double_to_int(sig_in) \ ((sig_in) > 32767 ? 32767 : ( \ (sig_in) < -32768 ? -32768 : (sig_in))) #define _MDCT_SCALE 512 /* Purpose: Codebook for LTP weight coefficients. */ static double codebook[CODESIZE] = { 0.570829, 0.696616, 0.813004, 0.911304, 0.984900, 1.067894, 1.194601, 1.369533 }; static double snr_pred(double *mdct_in, double *mdct_pred, int *sfb_flag, int *sfb_offset, int block_type, int side_info, int num_of_sfb) { int i, j, flen; double snr_limit; double num_bit, snr[NSFB_LONG]; double temp1, temp2; double energy[BLOCK_LEN_LONG], snr_p[BLOCK_LEN_LONG]; if (block_type != ONLY_SHORT_WINDOW) { flen = BLOCK_LEN_LONG; snr_limit = 1.e-30; } else { flen = BLOCK_LEN_SHORT; snr_limit = 1.e-20; } for (i = 0; i < flen; i++) { energy[i] = mdct_in[i] * mdct_in[i]; snr_p[i] = (mdct_in[i] - mdct_pred[i]) * (mdct_in[i] - mdct_pred[i]); } num_bit = 0.0; for (i = 0; i < num_of_sfb; i++) { temp1 = 0.0; temp2 = 0.0; for (j = sfb_offset[i]; j < sfb_offset[i + 1]; j++) { temp1 += energy[j]; temp2 += snr_p[j]; } if (temp2 < snr_limit) temp2 = snr_limit; if (temp1 > 1.e-20) snr[i] = -10. * log10 (temp2 / temp1); else snr[i] = 0.0; sfb_flag[i] = 1; if (block_type != ONLY_SHORT_WINDOW) { if (snr[i] <= 0.0) { sfb_flag[i] = 0; for (j = sfb_offset[i]; j < sfb_offset[i + 1]; j++) mdct_pred[j] = 0.0; } else { num_bit += snr[i] / 6. * (sfb_offset[i + 1] - sfb_offset[i]); } } } if (num_bit < side_info) { // printf("LTP not used!, num_bit: %f ", num_bit); num_bit = 0.0; for (j = 0; j < flen; j++) mdct_pred[j] = 0.0; for (i = 0; i < num_of_sfb; i++) sfb_flag[i] = 0; } else { num_bit -= side_info; // printf("LTP used!, num_bit: %f ", num_bit); } return (num_bit); } static void prediction(double *buffer, double *predicted_samples, double *weight, int lag, int flen) { int i, offset; int num_samples; offset = NOK_LT_BLEN - flen / 2 - lag; num_samples = flen; if(NOK_LT_BLEN - offset < flen) num_samples = NOK_LT_BLEN - offset; for(i = 0; i < num_samples; i++) predicted_samples[i] = *weight * _MDCT_SCALE*buffer[offset++]; for( ; i < flen; i++) predicted_samples[i] = 0.0; } static void w_quantize(double *freq, int *ltp_idx) { int i; double dist, low; low = 1.0e+10; dist = 0.0; for (i = 0; i < CODESIZE; i++) { dist = (*freq - codebook[i]) * (*freq - codebook[i]); if (dist < low) { low = dist; *ltp_idx = i; } } *freq = codebook[*ltp_idx]; } static int pitch(double *sb_samples, double *x_buffer, int flen, int lag0, int lag1, double *predicted_samples, double *gain, int *cb_idx) { int i, j, delay; double corr1, corr2, lag_corr; double p_max, energy, lag_energy; /* * Below is a figure illustrating how the lag and the * samples in the buffer relate to each other. * * ------------------------------------------------------------------ * | | | | | * | slot 1 | 2 | 3 | 4 | * | | | | | * ------------------------------------------------------------------ * * lag = 0 refers to the end of slot 4 and lag = DELAY refers to the end * of slot 2. The start of the predicted frame is then obtained by * adding the length of the frame to the lag. Remember that slot 4 doesn't * actually exist, since it is always filled with zeros. * * The above short explanation was for long blocks. For short blocks the * zero lag doesn't refer to the end of slot 4 but to the start of slot * 4 - the frame length of a short block. * * Some extra code is then needed to handle those lag values that refer * to slot 4. */ p_max = 0.0; lag_corr = lag_energy = 0.0; delay = lag0; for (i = lag0; i<lag1; i++) { energy = 0.0; corr1 = 0.0; for (j=0; j < flen; j++) { if (j < i+BLOCK_LEN_LONG) { corr1 += sb_samples[j] * _MDCT_SCALE * x_buffer[NOK_LT_BLEN - flen/2 - i + j]; energy += _MDCT_SCALE * x_buffer[NOK_LT_BLEN - flen/2 - i + j] * _MDCT_SCALE * x_buffer[NOK_LT_BLEN - flen/2 - i + j]; } } if (energy != 0.0) corr2 = corr1 / sqrt(energy); else corr2 = 0.0; if (p_max < corr2) { p_max = corr2; delay = i; lag_corr = corr1; lag_energy = energy; } } /* Compute the gain. */ if(lag_energy != 0.0) *gain = lag_corr / (1.010 * lag_energy); else *gain = 0.0; /* Quantize the gain. */ w_quantize(gain, cb_idx); // printf("Delay: %d, Coeff: %f", delay, *gain); /* Get the predicted signal. */ prediction(x_buffer, predicted_samples, gain, delay, flen); return (delay); } static double ltp_enc_tf(faacEncHandle hEncoder, CoderInfo *coderInfo, double *p_spectrum, double *predicted_samples, double *mdct_predicted, int *sfb_offset, int num_of_sfb, int last_band, int side_info, int *sfb_prediction_used, TnsInfo *tnsInfo) { double bit_gain; /* Transform prediction to frequency domain. */ FilterBank(hEncoder, coderInfo, predicted_samples, mdct_predicted, NULL, MNON_OVERLAPPED); /* Apply TNS analysis filter to the predicted spectrum. */ if(tnsInfo != NULL) TnsEncodeFilterOnly(tnsInfo, num_of_sfb, num_of_sfb, coderInfo->block_type, sfb_offset, mdct_predicted); /* Get the prediction gain. */ bit_gain = snr_pred(p_spectrum, mdct_predicted, sfb_prediction_used, sfb_offset, side_info, last_band, coderInfo->nr_of_sfb); return (bit_gain); } void LtpInit(faacEncStruct* hEncoder) { int i; unsigned int channel; for (channel = 0; channel < hEncoder->numChannels; channel++) { LtpInfo *ltpInfo = &(hEncoder->coderInfo[channel].ltpInfo); ltpInfo->buffer = AllocMemory(NOK_LT_BLEN * sizeof(double)); ltpInfo->mdct_predicted = AllocMemory(2*BLOCK_LEN_LONG*sizeof(double)); ltpInfo->time_buffer = AllocMemory(BLOCK_LEN_LONG*sizeof(double)); ltpInfo->ltp_overlap_buffer = AllocMemory(BLOCK_LEN_LONG*sizeof(double)); for (i = 0; i < NOK_LT_BLEN; i++) ltpInfo->buffer[i] = 0; ltpInfo->weight_idx = 0; for(i = 0; i < MAX_SHORT_WINDOWS; i++) ltpInfo->sbk_prediction_used[i] = ltpInfo->delay[i] = 0; for(i = 0; i < MAX_SCFAC_BANDS; i++) ltpInfo->sfb_prediction_used[i] = 0; ltpInfo->side_info = LEN_LTP_DATA_PRESENT; for(i = 0; i < 2 * BLOCK_LEN_LONG; i++) ltpInfo->mdct_predicted[i] = 0.0; } } void LtpEnd(faacEncStruct* hEncoder) { unsigned int channel; for (channel = 0; channel < hEncoder->numChannels; channel++) { LtpInfo *ltpInfo = &(hEncoder->coderInfo[channel].ltpInfo); if (ltpInfo->buffer) FreeMemory(ltpInfo->buffer); if (ltpInfo->mdct_predicted) FreeMemory(ltpInfo->mdct_predicted); if (ltpInfo->time_buffer) FreeMemory(ltpInfo->time_buffer); if (ltpInfo->ltp_overlap_buffer) FreeMemory(ltpInfo->ltp_overlap_buffer); } } int LtpEncode(faacEncHandle hEncoder, CoderInfo *coderInfo, LtpInfo *ltpInfo, TnsInfo *tnsInfo, double *p_spectrum, double *p_time_signal) { int i, last_band; double num_bit[MAX_SHORT_WINDOWS]; double *predicted_samples; ltpInfo->global_pred_flag = 0; ltpInfo->side_info = 0; predicted_samples = (double*)AllocMemory(2*BLOCK_LEN_LONG*sizeof(double)); switch(coderInfo->block_type) { case ONLY_LONG_WINDOW: case LONG_SHORT_WINDOW: case SHORT_LONG_WINDOW: last_band = (coderInfo->nr_of_sfb < MAX_LT_PRED_LONG_SFB) ? coderInfo->nr_of_sfb : MAX_LT_PRED_LONG_SFB; ltpInfo->delay[0] = pitch(p_time_signal, ltpInfo->buffer, 2 * BLOCK_LEN_LONG, 0, 2 * BLOCK_LEN_LONG, predicted_samples, <pInfo->weight, <pInfo->weight_idx); num_bit[0] = ltp_enc_tf(hEncoder, coderInfo, p_spectrum, predicted_samples, ltpInfo->mdct_predicted, coderInfo->sfb_offset, coderInfo->nr_of_sfb, last_band, ltpInfo->side_info, ltpInfo->sfb_prediction_used, tnsInfo); ltpInfo->global_pred_flag = (num_bit[0] == 0.0) ? 0 : 1; if(ltpInfo->global_pred_flag) for (i = 0; i < coderInfo->sfb_offset[last_band]; i++) p_spectrum[i] -= ltpInfo->mdct_predicted[i]; else ltpInfo->side_info = 1; break; default: break; } if (predicted_samples) FreeMemory(predicted_samples); return (ltpInfo->global_pred_flag); } void LtpReconstruct(CoderInfo *coderInfo, LtpInfo *ltpInfo, double *p_spectrum) { int i, last_band; if(ltpInfo->global_pred_flag) { switch(coderInfo->block_type) { case ONLY_LONG_WINDOW: case LONG_SHORT_WINDOW: case SHORT_LONG_WINDOW: last_band = (coderInfo->nr_of_sfb < MAX_LT_PRED_LONG_SFB) ? coderInfo->nr_of_sfb : MAX_LT_PRED_LONG_SFB; for (i = 0; i < coderInfo->sfb_offset[last_band]; i++) p_spectrum[i] += ltpInfo->mdct_predicted[i]; break; default: break; } } } void LtpUpdate(LtpInfo *ltpInfo, double *time_signal, double *overlap_signal, int block_size_long) { int i; for(i = 0; i < NOK_LT_BLEN - 2 * block_size_long; i++) ltpInfo->buffer[i] = ltpInfo->buffer[i + block_size_long]; for(i = 0; i < block_size_long; i++) { ltpInfo->buffer[NOK_LT_BLEN - 2 * block_size_long + i] = time_signal[i]; ltpInfo->buffer[NOK_LT_BLEN - block_size_long + i] = overlap_signal[i]; } }