Source code for phonlab.artic.egg2oq_

import scipy.signal
import numpy as np
import librosa
import pandas as pd

[docs] def egg_to_oq(x, fs, hop_dur = 0.005, f0_range = [60,400], hp_cut = 70, hp_order = 8, threshold=0.43): """Get glottal open quotient from electroglottography data Extract fundamental frequency of voice (f0) and glottal open quotient (OQ) at equally spaced intervals in a recording of electroglottography (a stereo file). OQ is calcuated with the 'hybrid' method (Herbst, 2020). Preprocessing filtering method follows Rothenberg (2002). Parameters ========== x : ndarray A one-dimensional array of samples in an electroglottograph waveform fs : int the sampling rate of **x** hop_dur : float, default = 0.005 interval in seconds between frames (0.005 seconds = 5 milliseconds) f0_range : list of two integers, default = [63,400] The lowest and highest allowed values of f0. hp_cut : int, default = 70 highpass filter cut frequency (in Hz) (Rothenberg, 2002) hp_order : int, default = 8 highpass filter order (Rothenberg, 2002) threshold : float, default = 0.43 threshold between 0 and 1, on the EGG signal for glottal opening instant (Herbst, 2020) Returns ======= df : DataFrame Open quotient and f0 results in a Pandas dataframe. Note ==== The returned Pandas dataframe has columns: * sec - midpoint times (seconds) of the analysis frames in f0 and OQ * OQ - the glottal open quotient as a function of time. * f0 - estimates of the fundamental frequency of voicing as a function of time References ========== C. T. Herbst (2020) Electroglottography − An Update. *Journal of Voice* , **34** (4), pp. 503-526. M. Rothenberg (2002) Correcting low-frequency phase distortion in electroglottograph waveforms. *Journal of Voice* , **16** (1), 32-36. Example ======= .. code-block:: Python file = "F1_bha24_1.wav" # a stereo file with audio in channel 0 and egg in 1 egg,audio,fs = phon.loadsig(file, chansel=[1,0]) # fool with `chansel` oqdf = phon.egg2oq(egg,fs) # return open quotient data See the example ipython notebook for the code used to generate this figure .. figure:: images/egg2oq.png :scale: 100 % :alt: a figure showing acoustic waveform, EGG signal, and calculated OQ and f0 :align: center Calculate Open Quotient and f0 from ElectroGlottoGraphic data .. """ window_length = (1.0/f0_range[0]) * 1.5 # add 25% for alignment? win = int(window_length*fs) # window for the longest period hop = int(hop_dur*fs) # 5ms hop # highpass filter the egg signal coefs = scipy.signal.butter(hp_order, hp_cut, fs=fs, btype='highpass', output='sos') egg = scipy.signal.sosfiltfilt(coefs, x) degg = np.gradient(egg) # differential of the egg # scale the filtered egg and degg to (0,1) -- for long files do this locally? # using pandas rolling_max egg_s = pd.Series(egg) degg_s = pd.Series(degg) rw = int(1.5*fs) if len(egg) > fs*5: # only do this if the duration is greater than 5 second5 eggmax = egg_s.rolling(rw,min_periods=10).max() # max in a rolling window deggmax = degg_s.rolling(rw,min_periods=10).max() eggmin = egg_s.rolling(rw,min_periods=10).min() # min in a rolling window deggmin = degg_s.rolling(rw,min_periods=10).min() eggmax[eggmax/np.max(egg) < 0.05] = np.max(egg) # require some minimal egg activity else: # no need for local max and min, just take the whole signal eggmax = np.max(egg) eggmin = np.min(egg) deggmax = np.max(degg) deggmin = np.min(degg) egg = (egg - eggmin)/(eggmax-eggmin) # range normalization degg = (degg - deggmin)/(deggmax-deggmin) # get peaks in the degg waveform - the glottal closing instants (gci) # minimum spacing between peaks (distance) is shortest possible period peak_sep = 1.0/f0_range[1] degg_peaks = scipy.signal.find_peaks(degg,distance=peak_sep*fs, height=0.5) glottal = np.zeros(degg.size) glottal[degg_peaks[0]]=1 # closing times (peaks returns indices of peaks) for i in range(egg.size-1): # opening times (threshold method) if egg[i]>threshold and egg[i+1]<threshold: glottal[i] = -1 frames = librosa.util.frame(glottal, frame_length=win, hop_length=hop, axis=0) frame_rate = 1/hop_dur sec = np.array([(i/frame_rate)+(window_length/2) for i in range(frames.shape[0])]) # get times of frames f0 = np.full(frames.shape[0],np.nan) OQ = np.full(frames.shape[0],np.nan) for k in range(frames.shape[0]): gci = np.argwhere(frames[k,:] == 1.0).flatten() # glottal closure instances goi = np.argwhere(frames[k,:] == -1.0).flatten() # glottal opening instances if (gci.size<2): # nothing to look at continue if (goi.size<2): # nothing to look at continue period = gci[1]-gci[0] # interval between closures if goi[0] > gci[0] and goi[0]<gci[1]: # find an appropriate opening instant op = goi[0] elif goi[1] > gci[0] and goi[1]<gci[1]: op = goi[1] else: continue OQ[k] = (gci[1]-op)/period f0[k] = 1/(period/fs) # may as well calculate this while we are here df = pd.DataFrame({'sec':sec,'OQ':OQ,'f0':f0}) return df