Source code for phonlab.acoustic.lpc_residual

import numpy as np
from librosa import util, lpc
from scipy.signal import windows, lfilter

from ..utils.prep_audio_ import prep_audio
from numba import njit

[docs] @njit(cache=True) def overlap_add(frames, hop_length, window_vec): """ Perform Overlap-Add reconstruction from 2D frames to a 1D signal. Parameters ========== frames : ndarray A 2D array of shape (n_frames, frame_length) hop_length : int Number of samples to shift for each frame window_vec : 1D array (frame_length) use scipy.signal.windows to get this vector with a statement like `win = windows.get_window('cosine', Nx=frames.shape[1], fftbins=False)` Returns ======= y : ndarray The reconstructed 1D signal. """ n_frames, frame_length = frames.shape final_length = (n_frames - 1) * hop_length + frame_length y = np.zeros(final_length, dtype=np.float32) for i in range(n_frames): start = i * hop_length for j in range(frame_length): # Windowing and Adding performed in a single pass # to maximize CPU cache efficiency y[start + j] += frames[i, j] * window_vec[j] return y
[docs] def lpcresidual(y, fs, target_fs=16000, order = 18, l=0.04, s=0.005, window='cosine'): """Compute the residual signal which results from filtering the input array **y** using LPC inverse filtering. This signal is useful in voice quality and periodicity routines. The LPC order is equal to (target_fs/1000) + 2, which is by default is 18. Parameters ========== y : ndarray A one-dimensional array of audio samples fs : int Sampling rate of **y** target_fs: int, default = 16000 Algorithms from the covarep library of voice analysis routines require target_fs=16000 order: integer, default = 18 The "order" of the LPC analysis. The number of coefficients to use in the LPC analysis. The default value is that recommended by Drugman, Kane, and Gobl (2013) for voice quality analysis (fs/1000 + 2), with the caveat that a smaller number may be more appropriate for voices with higher fundamental frequency. l: float, default = 0.04 The duration of the LPC analysis window, 40 milliseconds s: float, default = 0.005 The interval between successive frames in the LPC analysis, 5 milliseconds window : str, tuple, numeric, callable, list-like (default 'hann') Window function applied to each frame. This parameter is passed as the `window` parameter to scipy.signal's `get_window` function. Returns ======= lpc_residual : ndarray A one-dimensional array -- the residual derived by inverse filtering the input audio signal. It has the same number of samples as the input **y** array. fs : int The sampling rate of **lpc_residual**. It will be the same as **target_fs**, which by default is 16000 Hz. """ # 1. Resample and Pre-process x, fs = prep_audio(y, fs, target_fs=target_fs, pre=0, quiet=True) x = x.astype(np.float32) # Use float32 for speed and memory efficiency frame_length = int(fs * l) step = int(fs * s) # 2. Frame the signal # util.frame creates a 'view', which is O(1) memory and time. frames = util.frame(x, frame_length=frame_length, hop_length=step, axis=0) # 3. Compute LPC Coefficients # librosa.lpc is generally efficient, we process frames in one go. A = lpc(frames, order=order) # 4. Inverse Filtering (The Bottleneck) # Pre-allocate output for frames inv = np.zeros_like(frames) # Applying the filter A[i] to frames[i] # A coefficients from librosa are [1, a1, a2, ...] # The residual is calculated by filtering the frame with A for i in range(frames.shape[0]): # lfilter is significantly faster than fftconvolve for short FIR filters inv[i] = lfilter(A[i], [1.0], frames[i]) # 5. Energy Normalization (Vectorized) # Compute energy per frame to match original logic inv = inv * np.sum(np.square(frames))/np.sum(np.square(inv)) #frame_energy = np.sum(frames**2, axis=1, keepdims=True) #inv_energy = np.sum(inv**2, axis=1, keepdims=True) # Avoid division by zero #scale = np.sqrt(frame_energy / (inv_energy + 1e-12)) #inv *= scale #print(f"scale.shape = {scale.shape}") # 6. Overlap-Add window = window = 'boxcar' if window is None else window w = windows.get_window(window, Nx=frames.shape[1], fftbins=False) lpc_resid = overlap_add(inv, step, w) # 7. Final Normalization and Padding #lpc_resid = lpc_resid/np.max(np.fabs(lpc_resid)) max_val = np.max(np.fabs(lpc_resid)) if max_val > 0: lpc_resid /= max_val # Pad to match original length if len(lpc_resid) < len(x): lpc_resid = np.pad(lpc_resid, (0, len(x) - len(lpc_resid)), mode='edge') else: lpc_resid = lpc_resid[:len(x)] return lpc_resid, fs