Source code for phonlab.acoustic.fric_meas

import nitime.algorithms as tsa  # has the multitaper routine
import numpy as np
from scipy.signal import find_peaks
from collections import namedtuple

def dB(x, out=None):
    if out is None:
        return 10 * np.log10(x)
    else:
        np.log10(x, out)
        np.multiply(out, 10, out)

[docs] def hz2bark(hz): """ Convert frequency in Hz to Bark using the Schroeder 1977 formula:: bark(hz) = 7 * arcsinh(hz/650) Parameters ========== hz : scalar or array Frequency in Hz. Returns ======= bark : scalar or array Frequency in Bark, in an array the same size as `hz`, or a scalar if `hz` is a scalar. """ return 7 * np.arcsinh(hz/650)
[docs] def bark2hz(bark): ''' Convert frequency in Hz to Bark using the Schroeder 1977 formula:: Hz(b) = 650 * sinh(b/7) Parameters ---------- bark: scalar or array Frequency in Bark. Returns ------- scalar or array Frequency in Hz, in an array the same size as `bark`, or a scalar if `bark` is a scalar. ''' return 650 * np.sinh(bark/7)
FricMeas = namedtuple('FricMeas', [ 'Fm', 'Am', 'AmpD', 'Fsec', 'Asec', 'mode', 'COG', 'SD', 'Skew', 'Kurtosis', 'spec', 'freq' ])
[docs] def fricative(x,fs,t): """ Measure fricative acoustic values, from a 20 ms window, centered on time `t`. In addition to reporting the spectral moments (COG, SD, skew, and kurtosis) of the multitaper spectrum (from `nitime.algorithms.multi_taper_psd()`), `fricative()` finds the "main peak" in the fricative spectrum. For fricatives with a resonant cavity, this is the lowest resonance of the cavity. For those with no resonance the main peak frequency may be more a property of the source function. The main peak heuristic is to start by looking the in the spectrum above 300 Hz, for the lowest frequency peak that is at least 50% of the amplitude range, separated from the nearest peak by at least 500Hz, and prominent by 8bB above the next nearest peak (see the **scipy.signal.find_peaks()** documentation). If no peak is found, relax the prominence constraint, then the amplitude constraint, and then both. Parameters ========== x : ndarray a one-dimensional array of audio samples fs : int the sampling frequency of `x` (ideally should be at least 16000) t : float the time (in seconds) at which to take measurements (this is usually in a fricative, but doesn't have to be). Returns ======= namedtuple : A `FricMeas` namedtuple of fricative measures is returned, with the following fields: Fm : float Frequency (in Hz) of the first main spectral peak. A measure correlated with the length of the front tube. Am : float Amplitude (in dB) at Fm AmpD : float The difference in amplitude (dB) between the higher of Am or Asec and the minimum amplitude between 500Hz and Fm. A measure of sibilance. Fsec : float Frequency of the second major peak in the spectrum. If the front tube is long there can be a second resonance. If there is no second major peak, this field's value is `None` Asec: float Amplitude (in dB) at Fsec. If there is no second major peak, this field's value is `None` mode : string a report on the peak finding parameters used COG : float center of gravity, the first moment of the spectrum. SD : float standard deviation, the second moment of the spectrum. Skew : float scaled third moment, skew Kurtosis : float scaled fourth moment, kurtosis spec : ndarray the multi-taper power spectrum at the midpoint (e.g. for use in plotting spectra) freq : ndarray the frequency scale of the spectrum (e.g. for use in plotting spectra) Note ==== The major peaks analysis implemented here draws on ideas from Shadle (2023) and Shadle et al. (2023). Moments analysis was introduced for analysis of stop release burst spectra by Forrest et al. (1988). References ========== K. Forrest, G. Weismer, P. Milenkovic, and R.N. Dougall (1988) Statistical analysis of word-initial voiceless obstruents: Preliminary data. `J. Acoust. Soc. Am.` 84(1), 115–123. C. Shadle (2023) Alternatives to moments for characterizing fricatives: Reconsidering Forrest et al. (1988). `J. Acoust. Soc. Am.` 153 (2): 1412–1426. https://doi.org/10.1121/10.0017231 C. Shadle, W-R. Chen, L.L. Koenig, J.L. Preston (2023) Refining and extending measures for fricative spectra, with special attention to the high-frequency range. `J. Acoust. Soc. Am.` 154 (3): 1932–1944. https://doi-org.libproxy.berkeley.edu/10.1121/10.0021075 Example ======= This example returns fricative measurements from time 2.25 seconds in the audio samples returned by `loadsig()`. The major peak and COG frequencies are indicated in a plot of the spectrum. .. code-block:: Python x, fs = phon.loadsig("sf3_cln.wav") y, fs = phon.prep_audio(x, fs, target_fs=16000) fricm = phon.fricative(y, fs, 2.25) print(f"first major peak at {fricm.Fm:.1f}, Center of Gravity is {fricm.COG:.1f}") plt.plot(fricm.freq, fricm.spec) plt.axvline(fricm.Fm, color="red") plt.axvline(fricm.COG, color="green") The figure here shows major peaks and COG in several different fricatives. .. figure:: images/fricative.png :scale: 50 % :alt: Marking major peaks and COG in fricative spectra. :align: center Marking major peaks (red lines) and COG (green line) in fricative spectra. """ winsize = 0.02 # 20 ms window centered at midpoint (mp) i_center = int(t * fs) # index of midpoint time: seconds to samples i_start = int(i_center - winsize/2*fs) # back 10 ms i_end = int(i_center + winsize/2*fs) # forward 10 ms f, psd, nu = tsa.multi_taper_psd(x[i_start:i_end]) # get multi-taper spectrum spec = dB(psd) # work with magnitude spectrum freq = (f/(2*np.pi))*fs # frequency axis for the spectrum nyquist = fs/2 fspace = nyquist/len(f) # frequency spacing in spectrum - map from frequency to array index bottom_freq = int(300/fspace) # bottom of the search space (in points in the array) top_freq = int(16000/fspace) # set frequency range for analysis -- 11kHz by default, if (nyquist < top_freq): # but if sampling rate is less than 22kHz, the max top_freq = nyquist # is reduced to the Nyquist freq (1/2 the sampling rate) spec_chunk = spec[bottom_freq:top_freq] min_dist = int(500/fspace) # for peak picking - peaks must be at least this many points apart mode = '500/50%/8dB' height = 0.5 # proportion of amplitude range in fricative band min_height = np.min(spec_chunk) + (np.max(spec_chunk)-np.min(spec_chunk))*height min_prom = 8 # dB (peaks,prop) = find_peaks(spec_chunk, height=min_height, distance=min_dist, prominence=min_prom) if (len(peaks)<1): mode = '500/50%/4dB' min_prom = 4 # dB (peaks,prop) = find_peaks(spec_chunk, height=min_height, distance=min_dist, prominence=min_prom) if (len(peaks)<1): mode = '500/33%/3dB' height = 0.33 min_height = np.min(spec_chunk) + (np.max(spec_chunk)-np.min(spec_chunk))*height min_prom = 3 # dB (peaks,prop) = find_peaks(spec_chunk, height=min_height, distance=min_dist, prominence=min_prom) if (len(peaks)<1): mode = 'no peak found' peaks = np.append(peaks,[0]) index_of_main_peak = bottom_freq + peaks[0] # this is selecting the first peak in an array of peaks Fm = freq[index_of_main_peak] # convert to Hz Am = spec[index_of_main_peak] # get amplitude AmpD = Am - np.min(spec[bottom_freq:index_of_main_peak]) Fsec = np.nan Asec = np.nan if len(peaks)>1: index_of_second_peak = bottom_freq + peaks[1] Fsec = freq[index_of_second_peak] Asec = spec[index_of_second_peak] if Asec>Am: AmpD = Asec - np.min(spec[bottom_freq:index_of_main_peak]) # -------- moments analysis ----------------------- bottom_freq = int(300/fspace) top_freq = int(11000/fspace) # set frequency range for analysis -- 11kHz by default, if (nyquist < top_freq): # but if sampling rate is less than 22kHz, the max top_freq = nyquist # is reduced to the Nyquist freq (1/2 the sampling rate) f = freq[bottom_freq:top_freq] temp = spec[bottom_freq:top_freq] - np.min(spec[bottom_freq:top_freq]) # make sure none are negative Ps = temp/np.sum(temp) # scale to sum to 1 COG = np.sum(Ps*f) # center dev = f-COG # deviations from COG Var = np.sum(Ps * dev**2) # second moment SD = np.sqrt(Var) # Standard deviation Skew = np.sum(Ps * dev**3) # third moment Kurtosis = np.sum(Ps * dev**4) # fourth moment # scaling recommended by Forrest et al. 1990 Skew = Skew/np.sqrt(Var**3) Kurtosis = Kurtosis/(Var**2) - 3 return FricMeas( Fm=Fm, Am=Am, AmpD=AmpD, Fsec=Fsec, Asec=Asec, mode=mode, COG=COG, SD=SD, Skew=Skew, Kurtosis=Kurtosis, spec=spec, freq=freq )