import numpy as np
from librosa import util, lpc
from scipy.signal import windows, lfilter
from ..utils.prep_audio_ import prep_audio
from numba import njit
[docs]
@njit(cache=True)
def overlap_add(frames, hop_length, window_vec):
"""
Perform Overlap-Add reconstruction from 2D frames to a 1D signal.
Parameters
==========
frames : ndarray
A 2D array of shape (n_frames, frame_length)
hop_length : int
Number of samples to shift for each frame
window_vec : 1D array (frame_length)
use scipy.signal.windows to get this vector with a statement like
`win = windows.get_window('cosine', Nx=frames.shape[1], fftbins=False)`
Returns
=======
y : ndarray
The reconstructed 1D signal.
"""
n_frames, frame_length = frames.shape
final_length = (n_frames - 1) * hop_length + frame_length
y = np.zeros(final_length, dtype=np.float32)
for i in range(n_frames):
start = i * hop_length
for j in range(frame_length):
# Windowing and Adding performed in a single pass
# to maximize CPU cache efficiency
y[start + j] += frames[i, j] * window_vec[j]
return y
[docs]
def lpcresidual(y, fs, target_fs=16000, order = 18, l=0.04, s=0.005, window='cosine'):
"""Compute the residual signal which results from filtering the input array **y**
using LPC inverse filtering. This signal is useful in voice quality and periodicity routines.
The LPC order is equal to (target_fs/1000) + 2, which is by default is 18.
Parameters
==========
y : ndarray
A one-dimensional array of audio samples
fs : int
Sampling rate of **y**
target_fs: int, default = 16000
Algorithms from the covarep library of voice analysis routines require target_fs=16000
order: integer, default = 18
The "order" of the LPC analysis. The number of coefficients to use in the LPC analysis.
The default value is that recommended by Drugman, Kane, and Gobl (2013) for voice quality
analysis (fs/1000 + 2), with the caveat that a smaller number may be more appropriate for
voices with higher fundamental frequency.
l: float, default = 0.04
The duration of the LPC analysis window, 40 milliseconds
s: float, default = 0.005
The interval between successive frames in the LPC analysis, 5 milliseconds
window : str, tuple, numeric, callable, list-like (default 'hann')
Window function applied to each frame. This parameter is passed as the
`window` parameter to scipy.signal's `get_window` function.
Returns
=======
lpc_residual : ndarray
A one-dimensional array -- the residual derived by inverse filtering the input
audio signal. It has the same number of samples as the input **y** array.
fs : int
The sampling rate of **lpc_residual**. It will be the same as **target_fs**, which
by default is 16000 Hz.
"""
# 1. Resample and Pre-process
x, fs = prep_audio(y, fs, target_fs=target_fs, pre=0, quiet=True)
x = x.astype(np.float32) # Use float32 for speed and memory efficiency
frame_length = int(fs * l)
step = int(fs * s)
# 2. Frame the signal
# util.frame creates a 'view', which is O(1) memory and time.
frames = util.frame(x, frame_length=frame_length, hop_length=step, axis=0)
# 3. Compute LPC Coefficients
# librosa.lpc is generally efficient, we process frames in one go.
A = lpc(frames, order=order)
# 4. Inverse Filtering (The Bottleneck)
# Pre-allocate output for frames
inv = np.zeros_like(frames)
# Applying the filter A[i] to frames[i]
# A coefficients from librosa are [1, a1, a2, ...]
# The residual is calculated by filtering the frame with A
for i in range(frames.shape[0]):
# lfilter is significantly faster than fftconvolve for short FIR filters
inv[i] = lfilter(A[i], [1.0], frames[i])
# 5. Energy Normalization (Vectorized)
# Compute energy per frame to match original logic
inv = inv * np.sum(np.square(frames))/np.sum(np.square(inv))
#frame_energy = np.sum(frames**2, axis=1, keepdims=True)
#inv_energy = np.sum(inv**2, axis=1, keepdims=True)
# Avoid division by zero
#scale = np.sqrt(frame_energy / (inv_energy + 1e-12))
#inv *= scale
#print(f"scale.shape = {scale.shape}")
# 6. Overlap-Add
window = window = 'boxcar' if window is None else window
w = windows.get_window(window, Nx=frames.shape[1], fftbins=False)
lpc_resid = overlap_add(inv, step, w)
# 7. Final Normalization and Padding
#lpc_resid = lpc_resid/np.max(np.fabs(lpc_resid))
max_val = np.max(np.fabs(lpc_resid))
if max_val > 0:
lpc_resid /= max_val
# Pad to match original length
if len(lpc_resid) < len(x):
lpc_resid = np.pad(lpc_resid, (0, len(x) - len(lpc_resid)), mode='edge')
else:
lpc_resid = lpc_resid[:len(x)]
return lpc_resid, fs