Source code for phonlab.acoustic.rlpc

import numpy as np
from librosa import util, lpc, frames_to_time
from scipy.signal import windows, fftconvolve, lfilter
from scipy.linalg import inv
from pandas import DataFrame
from numba import jit

# prep_audio, choose_order
from ..utils.prep_audio_ import prep_audio
from ..acoustic.choose_order_ import choose_order

@jit(nopython=True)
def weighted_cov(x,weight,npoles):
    # weighted covariance matrix, down-weighting samples with high LPC error
    C1 = np.zeros((npoles+1,npoles+1))
    for i in range(1,npoles+1):
        for j in range(1,npoles+1):
            for n in range(npoles+1,len(x)):
                C1[i][j] += x[n-i] * x[n-j] * weight[n]
    return C1[1:npoles+1,1:npoles+1]

@jit(nopython=True)
def weighted_cor(x,weight,npoles):
    # weighted correlation matrix, down-weighting samples with high LPC error
    c1 = np.zeros((npoles+1))
    for j in range(1,npoles+1):
        for n in range(npoles+1,len(x)):
            c1[j] += x[n] * x[n-j] * weight[n]
    return c1[1:npoles+1]
    
[docs] def RLPC_formants(x, fs, tf = 6000, preemphasis = 0.94,order = -1, base = "BIC", s = 0.01, l = 0.03, window = None, numiter=1, verbose=False): """An implementation of Lee's (1988) method for Robust Linear Prediction Coding. The method computes the residual signal from a conventional LPC analysis and then uses it to down-weight samples that produce large prediction error. It is possible to iterate the residual reweighting process more than once, though not much improvement is gained after the first iteration. Formant frequencies are found from the LPC coefficients using polynomial root solving. A couple of functions called by `RLPC_formants()` are accelerated by the use of numba just in time (JIT) compilation into machine code. Therefore, the first call to `RLPC_formants()` will be slower (by about 10 ms) than subsequent calls. Note that unlike other formant analysis functions, this one returns both a dataframe with analysis results and also a 2D matrix with time-varying LPC coefficients. Parameters ========== x : ndarray a 1D array of audio samples fs : int the sampling rate of samples in x. This will be resampled to 12,000 Hz for analysis, so choose the order paramter with this in mind. tf : int (default = 6000) The top frequency of the analysis band. The input signal will be resampled to a rate of tf*2. preemphasis : float (default = 0.94) value of a high frequency preemphasis (spectral tilt) factor (0-1). order : int (default = -1) The LPC 'order' (i.e. the number of coefficients in the LPC filter. A value of -1 uses the function phon.choose_order() to determine the best value for this parameter. base : string (default = 'BIC') If `order` is -1, then use this base ('BIC' or 'coef') in phon.choose_order(). s : float (default = 0.01) the interval (in seconds) between successive formant measurements. l : float (default = 0.05) duration (in seconds) of analysis frames window : string (default = 'cosine') The name of a window shape for scipy.signal.windows.get_window() numiter : integer (default = 1) Number of robust LPC iterations to perform. If this value is 0 the results of the conventional LPC analysis are returned with no robust recalculation of the linear prediction. verbose : boolean (default = False) Print diagnostic messages Returns ======= df - a pandas dataframe with columns as described in the note below. A - a two-dimensional numpy matrix with the LPC coefficients for each analysis frame. Note ==== The columns in the output dataframe are: * sec - midpoint time of the frame * gain - the LPC gain (variance of the residual) for each frame. * F1-4 - the lowest four vowel 'formants' - vocal tract resonances. * BW1-4 - bandwidths of the vowel formants References ========== C.H. Lee (1988) On robust prediction of speech. `IEEE Transactions on Acoustics, Speech and Signal Processing`, 36,5: 642-650. """ target_fs = tf*2 x, fs = prep_audio(x, fs, target_fs = target_fs, pre = preemphasis, quiet = not verbose) frame_length = int(fs*l) step = int(fs*s) if order<0: # guess the correct LPC order order,time = choose_order(x,fs,l, base='BIC', verbose=verbose) if verbose: print(f"in RLPC_formants(), order is {order}, taken at time {time:.3f}") frames = util.frame(x, frame_length=frame_length, hop_length=step, axis=0) t = frames_to_time(range(frames.shape[0]),sr=fs, hop_length=step, n_fft=frame_length) resid = np.zeros_like(frames) window = window = 'boxcar' if window is None else window w = windows.get_window(window, Nx=frames.shape[1]) frames = frames * w A = lpc(frames, order=order) # get conventional lpc coefficients nb = A.shape[0] # the number of frames in the LPC analysis # begin robust LPC process for iter in range(numiter): # iterate this loop numiter times for i in range(nb): resid[i] = lfilter(A[i], [1.0], frames[i]) # prediction error sd = np.std(resid,axis=1) temp = np.clip(resid/sd[:,None],a_min=-1.5,a_max=1.5) # min/max clipping weight = (sd[:,None] * temp)/resid # down weight samples with high error for i in range(nb): # recompute A frame by frame C = weighted_cov(frames[i],weight[i],order) c = weighted_cor(frames[i],weight[i],order) A[i,1:order+1] = np.dot(-inv(C), c) # Now we have the refined LPC coefficients if (order-2)/2 < 4: nf = int((order-2)/2) # the number of formants that will be computed else: nf = 4 # the number of formants that will be returned formants = np.empty((nb,nf)) # arrays to be filled by analysis bandwidths = np.empty((nb,nf)) for i in range(nb): # find formants from LPC coefficients (A) using root solving roots = np.roots(A[i,:]) # solve for the roots of the polynomial roots = roots[np.imag(roots)>=0] # only keep the positive ones fr = np.angle(roots) * fs/(2 * np.pi) # calculate formant frequencies from them bw = -(fs/(np.pi)) * np.log(abs(roots)) # calculate bandwidths from them resid[i] = lfilter(A[i], [1.0], frames[i]) # prediction error # A wide bandwidth criterion (about 800 Hz) is needed to capture fast moving formants. fr[bw>800] = np.nan # reject formants with too wide bandwidth, NaN sorts to the end of the array s = np.argsort(fr) # get the sort order so you can apply it more than one array fr = fr[s] # sort the formants from lowest to highest bw = bw[s] # use the same sort order with the bandwidth array formants[i,:] = fr[:nf] bandwidths[i,:] = bw[:nf] gain = np.std(resid,axis=1,ddof=order)**2 list = ['sec',] + [f"F{i}" for i in range(1,nf+1)] + [f"BW{i}" for i in range(1,nf+1)] + ['gain'] df = DataFrame(np.concatenate((t.reshape(nb,1), formants, bandwidths, gain.reshape(nb,1)), axis=1),columns=(list)) return df,A