from ..utils.prep_audio_ import prep_audio
import numpy as np
from librosa import util
from numpy.fft import rfft, irfft
from pandas import DataFrame
import matplotlib.pyplot as plt
[docs]
def HNR(x,fs, f0_range = [64,400], l= 0.06, s=0.005, target_time = None):
'''Compute the Harmonics-to-Noise Ratio using the Cepstrum-Based method given by de Krom (1993).
The matlab implementation by Yen-Liang Shue (2009) which is a part of the VoiceSauce collection of
software provided some valuable pointers in how to implement de Krom's method. One difference between
the Shue implementation and this one is that here we use a fixed window size (specified with the input
parameter **l** and by default 60 ms) where Shue's algorithm required that F0 estimates be provided as
input and then HNR is calculated over windows of variable length, always long enough for 5 pitch periods
according to the estimated F0 for that location in the audio.
de Krom's (1993) method uses cepstral analysis to filter out the harmonic component of the spectrum,
allowing the separate calculation of harmonic and non-harmonic energy.
Parameters
==========
x : ndarray
A one-dimensional array of audio samples
fs : int
Sampling rate of **x**
f0_range : an array of two numbers, default=[64,400]
The expected range for f0.
l : float, default = 0.06
Length of analysis windows. The default is 60 milliseconds.
s : float, default = 0.005
Step size, of hops between analysis windows. The default is 5 milliseconds.
target_time : float, default = None
If a time value (in seconds) is given, diagnostic matplotlib plots of the spectrum and cepstrum
will be produced.
Returns
=======
df: pandas DataFrame
measurements at (by default) 5 msec intervals.
Note
====
The columns in the returned dataframe are for each frame of audio:
* sec - time at the midpoint of each frame in seconds
* f0 - estimate of the fundamental frequency in Hz
* hnr_500 - The harmonics-to-noise ratio in the spectrum below 500Hz
* hnr_1500 - The harmonics-to-noise ratio in the spectrum below 1500Hz
* hnr_2500 - The harmonics-to-noise ratio in the spectrum below 2500Hz
References
==========
G. de Krom (1993) A Cepstrum-Based Technique for Determining a Harmonics-to-Noise Ratio in Speech Signals. `Jounral of Speech and Hearing Research` 36,254-266.
Example
=======
.. code-block:: Python
example_file = importlib.resources.files('phonlab') / 'data/example_audio/sf3_cln.wav'
x,fs = phon.loadsig(example_file,chansel=[0])
df = HNR(x,fs, target_time=2.1) # get diagnostic plots for the spectrum at time 2.1 seconds.
This example shows diagnostic plots of the spectrum (top panel) and cepstrum (bottom panel) of a frame of audio
at time 2.1 seconds in the file 'sf3_cln.wav'. In the top panel we see the log magnitude spectrum in black, the noise component of the spectrum in orange, and the harmonic component in light blue. In the bottom panel we see the cepstrum of the log magnitude spectrum. The "Rahmonics" in black are removed (set to zero) giving the "liftered" cepstrum (in pink). The FFT of this liftered cepstrum gives the noise spectrum (well, after some level correction).
.. figure:: images/HNR.png
:scale: 50 %
:alt: Diagnostic plots of the spectrum (top panel) and cepstrum (bottom panel) of a frame of audio.
:align: center
'''
x,fs = prep_audio(x,fs,target_fs=16000,quiet=True)
frame_length = int(l*fs)
step = int(s*fs)
half_frame = frame_length//2
frame_length = half_frame * 2 + 1 # odd number in frame
#NFFT = int(2**(np.ceil(np.log(frame_length)/np.log(2)))*2)
NFFT = frame_length
frames = util.frame(x,frame_length=frame_length, hop_length=step,axis=0)
nb = frames.shape[0] # number of frames
ts = (np.array(range(nb)) * step + half_frame)/fs # time axis for output
w = np.blackman(frame_length)
S = 10 * np.log10(np.abs(rfft(w*frames,NFFT)))
C = np.real(irfft(S,NFFT)) # spectrum of the spectrum = cepstrum
if f0_range[0]<f0_range[1]:
f0_range = np.flip(f0_range) # from [60,400] -> [400,60]
T0_range = np.round(fs/np.array(f0_range)).astype(np.int32)
T0 = np.argmax(C[:,T0_range[0]:T0_range[1]],axis=-1) + T0_range[0] # F0 period
F0 = fs/T0
half_combwidth = 5
C_cl = C.copy()
maxindex = C_cl.shape[1]-1
for j in range(nb): # lifter out the harmonics
for k in range(1,4):
il = T0[j]*k - (half_combwidth+1)
ir = T0[j]*k + half_combwidth
if ir>maxindex: break
C_cl[j,il:ir] = 0
# -------- get noise spectrum from liftered cepstrum
Nap = np.real(rfft(C_cl[:,:NFFT//2], NFFT))
Ha = S - Nap # estimate Harmonic spectrum
# ------ Baseline correction ---------
N = Nap
for j in range(nb):
Hdelta = F0[j]/fs * NFFT
for f in np.arange(Hdelta+0.0001,NFFT//2-1,Hdelta):
fend = round(f)
fstart = np.ceil(f-Hdelta).astype(np.int32)
N[j,fstart:fend] += np.min(Ha[j,fstart:fend])
H = S - N # harmonic spectrogram
N = S - H # noise spectrogram
i500 = int(500/fs * NFFT) # index at 500 Hz
i1500 = int(1500/fs * NFFT)
i2500 = int(2500/fs * NFFT)
HNR500 = np.mean(H[:,:i500],axis=-1) - np.mean(N[:,:i500],axis=-1)
HNR1500 = np.mean(H[:,:i1500],axis=-1) - np.mean(N[:,:i1500],axis=-1)
HNR2500 = np.mean(H[:,:i2500],axis=-1) - np.mean(N[:,:i2500],axis=-1)
# ------------- diagnostic plot ---------------
if target_time != None:
fig,[ax1,ax2] = plt.subplots(nrows=2,ncols=1)
quef = (np.array(range(1,NFFT//2+1))/fs *1000) # the quefrecy axis of the cepstra (in ms)
freqs = np.array(range(1,NFFT//2+1)) * (fs/NFFT)
fn = int(((target_time * fs)-half_frame)/step)
print(f"{ts[fn]}, {T0[fn]/fs*5}, {F0[fn]}, {HNR500[fn]}, {HNR1500[fn]}")
print(f"T0[fn] = {T0[fn]}, {quef[T0[fn]]}")
ax1.plot(freqs[:NFFT//4],N[fn,:NFFT//4],color="orange")
ax1.plot(freqs[:NFFT//4],H[fn,:NFFT//4],color="lightblue")
ax1.plot(freqs[:NFFT//4],S[fn,:NFFT//4],color="black")
ax2.plot(quef[2:NFFT//2],C[fn,2:NFFT//2],color='black')
ax2.plot(quef[2:NFFT//2],C_cl[fn,2:NFFT//2],color='pink')
ax2.axvline(quef[T0[fn]],color='black',alpha=0.2)
return DataFrame({'sec': ts, 'f0':F0, 'hnr_500':HNR500, 'hnr_1500':HNR1500, 'hnr_2500':HNR2500})