Source code for phonlab.acoustic.get_HNR

from ..utils.prep_audio_ import prep_audio

import numpy as np
from librosa import util
from numpy.fft import rfft, irfft
from pandas import DataFrame
import matplotlib.pyplot as plt


[docs]
def HNR(x,fs, f0_range = [64,400], l= 0.06, s=0.005, target_time = None):
    '''Compute the Harmonics-to-Noise Ratio using the Cepstrum-Based method given by de Krom (1993).
The matlab implementation by Yen-Liang Shue (2009) which is a part of the VoiceSauce collection of 
software provided some valuable pointers in how to implement de Krom's method.  One difference between
the Shue implementation and this one is that here we use a fixed window size (specified with the input 
parameter **l** and by default 60 ms) where Shue's algorithm required that F0 estimates be provided as 
input and then HNR is calculated over windows of variable length, always long enough for 5 pitch periods
according to the estimated F0 for that location in the audio.

de Krom's (1993) method uses cepstral analysis to filter out the harmonic component of the spectrum, 
allowing the separate calculation of harmonic and non-harmonic energy. 

Parameters
==========
    x : ndarray
        A one-dimensional array of audio samples
    fs : int
        Sampling rate of **x**
    f0_range : an array of two numbers, default=[64,400]
        The expected range for f0.
    l : float, default = 0.06
        Length of analysis windows.  The default is 60 milliseconds.
    s : float, default = 0.005
        Step size, of hops between analysis windows. The default is 5 milliseconds.
    target_time : float, default = None
        If a time value (in seconds) is given, diagnostic matplotlib plots of the spectrum and cepstrum 
        will be produced.
    
Returns
=======
    df: pandas DataFrame  
            measurements at (by default) 5 msec intervals.

Note
====
The columns in the returned dataframe are for each frame of audio:
    * sec - time at the midpoint of each frame in seconds
    * f0 - estimate of the fundamental frequency in Hz
    * hnr_500 - The harmonics-to-noise ratio in the spectrum below 500Hz
    * hnr_1500 - The harmonics-to-noise ratio in the spectrum below 1500Hz
    * hnr_2500 - The harmonics-to-noise ratio in the spectrum below 2500Hz


References
==========
    G. de Krom (1993) A Cepstrum-Based Technique for Determining a Harmonics-to-Noise Ratio in Speech Signals. `Jounral of Speech and Hearing Research` 36,254-266.


Example
=======

.. code-block:: Python

    example_file = importlib.resources.files('phonlab') / 'data/example_audio/sf3_cln.wav'
    x,fs = phon.loadsig(example_file,chansel=[0])

    df = HNR(x,fs, target_time=2.1)  # get diagnostic plots for the spectrum at time 2.1 seconds.


This example shows diagnostic plots of the spectrum (top panel) and cepstrum (bottom panel) of a frame of audio
at time 2.1 seconds in the file 'sf3_cln.wav'. In the top panel we see the log magnitude spectrum in black, the noise component of the spectrum in orange, and the harmonic component in light blue.  In the bottom panel we see the cepstrum of the log magnitude spectrum.  The "Rahmonics" in black are removed (set to zero) giving the "liftered" cepstrum (in pink).  The FFT of this liftered cepstrum gives the noise spectrum (well, after some level correction).

.. figure:: images/HNR.png
    :scale: 50 %
    :alt: Diagnostic plots of the spectrum (top panel) and cepstrum (bottom panel) of a frame of audio.
    :align: center  


    '''
    x,fs = prep_audio(x,fs,target_fs=16000,quiet=True)
    
    frame_length = int(l*fs)
    step = int(s*fs)
    half_frame = frame_length//2
    frame_length = half_frame * 2 + 1    # odd number in frame
    #NFFT = int(2**(np.ceil(np.log(frame_length)/np.log(2)))*2)
    NFFT = frame_length
    
    frames = util.frame(x,frame_length=frame_length, hop_length=step,axis=0)

    nb = frames.shape[0]  # number of frames
    ts = (np.array(range(nb)) * step + half_frame)/fs  # time axis for output
    w = np.blackman(frame_length)
    S = 10 * np.log10(np.abs(rfft(w*frames,NFFT)))
    C = np.real(irfft(S,NFFT))  # spectrum of the spectrum = cepstrum

    if f0_range[0]<f0_range[1]:
        f0_range = np.flip(f0_range)  # from [60,400] -> [400,60]
    T0_range = np.round(fs/np.array(f0_range)).astype(np.int32)
    T0 = np.argmax(C[:,T0_range[0]:T0_range[1]],axis=-1) + T0_range[0]  # F0 period
    F0 = fs/T0
    
    half_combwidth = 5
    C_cl = C.copy()
    maxindex = C_cl.shape[1]-1
    for j in range(nb):     # lifter out the harmonics
        for k in range(1,4):
            il = T0[j]*k - (half_combwidth+1)
            ir = T0[j]*k + half_combwidth
            if ir>maxindex: break
            C_cl[j,il:ir] = 0

    # -------- get noise spectrum from liftered cepstrum
    Nap = np.real(rfft(C_cl[:,:NFFT//2], NFFT)) 
    Ha = S - Nap          # estimate Harmonic spectrum

    # ------ Baseline correction ---------
    N = Nap
    for j in range(nb):
        Hdelta = F0[j]/fs * NFFT
        for f in np.arange(Hdelta+0.0001,NFFT//2-1,Hdelta):
            fend = round(f)
            fstart = np.ceil(f-Hdelta).astype(np.int32)
            N[j,fstart:fend] += np.min(Ha[j,fstart:fend])
    H = S - N  # harmonic spectrogram
    N = S - H  # noise spectrogram
    
    i500 = int(500/fs * NFFT)  # index at 500 Hz
    i1500 = int(1500/fs * NFFT)
    i2500 = int(2500/fs * NFFT)

    HNR500 = np.mean(H[:,:i500],axis=-1) - np.mean(N[:,:i500],axis=-1)
    HNR1500 = np.mean(H[:,:i1500],axis=-1) - np.mean(N[:,:i1500],axis=-1)
    HNR2500 = np.mean(H[:,:i2500],axis=-1) - np.mean(N[:,:i2500],axis=-1)


    # ------------- diagnostic plot ---------------
    if target_time != None:
        fig,[ax1,ax2] = plt.subplots(nrows=2,ncols=1)
        quef = (np.array(range(1,NFFT//2+1))/fs *1000)  # the quefrecy axis of the cepstra (in ms)
        freqs = np.array(range(1,NFFT//2+1)) * (fs/NFFT)

        fn = int(((target_time * fs)-half_frame)/step)
        print(f"{ts[fn]}, {T0[fn]/fs*5}, {F0[fn]}, {HNR500[fn]}, {HNR1500[fn]}")
        print(f"T0[fn] = {T0[fn]}, {quef[T0[fn]]}")
        ax1.plot(freqs[:NFFT//4],N[fn,:NFFT//4],color="orange")
        ax1.plot(freqs[:NFFT//4],H[fn,:NFFT//4],color="lightblue")
        ax1.plot(freqs[:NFFT//4],S[fn,:NFFT//4],color="black")

        ax2.plot(quef[2:NFFT//2],C[fn,2:NFFT//2],color='black')
        ax2.plot(quef[2:NFFT//2],C_cl[fn,2:NFFT//2],color='pink')
        ax2.axvline(quef[T0[fn]],color='black',alpha=0.2)

    return DataFrame({'sec': ts, 'f0':F0, 'hnr_500':HNR500, 'hnr_1500':HNR1500, 'hnr_2500':HNR2500})