import scipy.signal
import scipy.io.wavfile
import numpy as np
import matplotlib.pyplot as plt
from librosa.util import frame
from librosa import frames_to_time
from ..utils.prep_audio_ import prep_audio
def rem_dc(x):
'''Remove the DC component of a signal.'''
return x - np.mean(x)
def bandpass(x, cutoffs, fs, order=3):
'''Apply a butterworth bandpass filter.'''
nyq = 0.5 * fs
coefs = scipy.signal.butter(order, np.array(cutoffs) / nyq, btype='band',output='sos')
return scipy.signal.sosfiltfilt(coefs, x)
def lowpass(x, cutoff, fs, order=3):
'''Apply a butterworth lowpass filter.'''
nyq = 0.5 * fs
coefs = scipy.signal.butter(order, cutoff / nyq, btype='low',output='sos')
return scipy.signal.sosfiltfilt(coefs, x)
def centerpass(x, center, bw, fs):
'''Apply a 'center-pass' filter - a peak at center frequency with a bandwidth bw.'''
nyq = 0.5 * fs
w0 = center/nyq
Q = w0 / (bw/nyq)
coefs = scipy.signal.iirpeak(w0, Q,output='sos')
return scipy.signal.sosfiltfilt(coefs, x)
# this function takes a chunk of waveform and returns a frequency spectrum
# sec is the size of the chunk in seconds
[docs]
def get_rhythm_spectrum(x,fs):
"""Return a rhythm spectrum - ala Tilsen & Johnson, 2008
This approach to measuring rythmicity was described by Tilsen and Johnson (2008). The code here and in `rhythmogram` is a translation and extension of Tilsen's Matlab code. This function takes a chunk of waveform (best if it is 4 seconds long) and computes a spectrum of the bandpass filtered amplitude envelope. It finds periodicity in the amplitude envelope and returns a spectrum in the range from **f** to 7 Hz, tracking components in the amplitude envelope that repeat at low frequency. The lowest frequency represented **f** is determined from the duration of the waveform in **x**, which should be at least 4 seconds for accurate detection of slow rhythmic components.s
Parameters
==========
x : ndarray
A one dimensional array of audio samples -
fs : int
The sampling frequency of x
Returns
=======
freq : ndarray
The frequency axis of the rhythm spectrum
psd : ndarray
The amplitude values of the rhythm spectrum
References
==========
S. Tilsen & K. Johnson (2008) Low-frequency Fourier analysis of speech rhythm. `Journal of the Acoustical Society of America`, **124** (2), EL34-EL39.
Example
=======
.. code-block:: Python
x,fs = phon.loadsig("s09003.wav")
slice = x[fs*3:fs*7]
f,Sx = phon.get_rhythm_spectrum(slice,fs)
plt.plot(f,Sx)
.. figure:: images/get_rhythm_spectrum.png
:scale: 40 %
:alt: the spectrum of the amplitude envelope of a 4 second chunk of audio
:align: center
The spectrum of the amplitude envelope of a 4 second chunk of audio. This section of speech in Spanish has an oscillation of about 3 Hz; regular repetition of amplitude peaks at intervals of about 330 msec.
"""
ds_rate = 100
ds_factor = fs//ds_rate
npoints = 512
chunk_size = len(x)/fs
# Amplitude Envelope.
x = bandpass(x, cutoffs=[100, 1500], fs=fs, order=2)
x = lowpass(np.fabs(x), cutoff=10, fs=fs, order=6)
# Downsample and remove DC component.
y = rem_dc(scipy.signal.decimate(x, ds_factor, ftype = 'fir', zero_phase=True))
y = y * scipy.signal.windows.tukey(len(y), 0.1) # Shape downsampled signal with a Tukey window.
norm_frame = y / np.sqrt(np.var(y)) # Normalize to unit variance.
freq, powsd = scipy.signal.periodogram(norm_frame, # find frequency spectrum of the amplitude envelope
fs=ds_rate, nfft=npoints, scaling="spectrum")
min_freq = 1/(chunk_size*(1/2))
i = np.where((freq < 7) & (freq > min_freq)) # pick indices for spectrum less than lowpass Hz
return (freq[i],powsd[i])
[docs]
def rhythmogram(x,fs):
"""Return a rhythm spectrogram - ala Tilsen & Johnson, 2008
The approach to measuring rythmicity that is implemented by this function was described by Tilsen and Johnson (2008). The code here and in `get_rhythm_spectrum` is a translation and extension of Tilsen's Matlab code. This function takes a buffer of audio and computes spectra of the amplitude envelope of the bandpass [100 - 1500Hz] filtered signal. Spectra are computed twice a second (i.e. the step size is 0.5 seconds) over windows that are 4 seconds long. It finds periodicity in the amplitude envelope and returns a spectrogram with a frequency range from 1 to 7 Hz, tracking components that repeat at intervals of once per second down to a repetition rate of 140 ms.
Parameters
----------
x : ndarray
A 1-D array of audio samples
fs : int
the sampling frequency of the audio in **x** (in Hz)
Returns
-------
t : ndarray, one-dimensional
the time axis of the rhythmogram, in seconds
f : ndarray, one-dimensional
the frequency axis of the rhythmogram, in Hz
Sxx : ndarray, two-dimensional
the amplidude values of the rhythmogram, axis 1 is time, axis 0 is frequency
References
----------
S. Tilsen & K. Johnson (2008) Low-frequency Fourier analysis of speech rhythm. `Journal of the Acoustical Society of America`, **124** (2), EL34-EL39.
Example
-------
.. code-block:: Python
x,fs = phon.loadsig("s1202a.wav", chansel=[0])
ts,f,Sxx = phon.rhythmogram(x,fs) # calculate rhythm spectra over time
m = np.mean(Sxx,axis=0) # the mean spectrum of the file
sd = np.std(Sxx,axis=0) # the standard deviation of the spectrum
Sxx_thresh = Sxx - (m + 0.5*sd) # subtract a threshold to find "rhythmic" sections
start = 136 # seconds, show the thresholded spectrogram for a 30 second interval
end = 166
s = np.int32((start-2) *2) # start frame, two frames per second
e = np.int32((end-2) *2) # end frame
extent = (start,end,min(f),max(f)) # get the time and frequency values for figure.
plt.imshow(Sxx_thresh.T[:,s:e], aspect='auto', extent = extent,
origin='lower',cmap="coolwarm",interpolation="spline36")
plt.set(xlabel="Time (sec)", ylabel="Frequency (Hz)")
.. figure:: images/rhythmogram.png
:scale: 40 %
:alt: a time/frequency plot of low frequency energy in a 30 second long chunk of speech
:align: center
A time/frequency plot of low frequency energy in a 30 second long chunk of speech
"""
# Amplitude Envelope.
y = bandpass(x, cutoffs=[100, 1500], fs=fs, order=2)
y = lowpass(np.fabs(y), cutoff=10, fs=fs, order=6)
# Downsample and remove DC component.
fs_env = 100 # desired sampling frequency of the envelope
ds_factor = fs//fs_env # new rate will be 100 samples per second
env = rem_dc(scipy.signal.decimate(y, ds_factor, ftype = 'fir', zero_phase=True))
# divide into frames, window them
flen_sec = 4
hop_sec = 0.5
frame_length = np.int32(flen_sec * fs_env)
step = np.int32(hop_sec * fs_env)
frames = frame(env, frame_length=frame_length, hop_length=step,axis=0)
t = frames_to_time(range(len(frames)), sr=fs_env, hop_length=step,n_fft = frame_length)
w = scipy.signal.windows.tukey(frame_length,0.1)
frames = np.multiply(frames,w) # apply a Tukey window to each frame
norm_frames = frames.T / np.sqrt(np.var(frames,axis=1)) # normalize the amplitudes in the frames
freq, psd = scipy.signal.periodogram(norm_frames.T,fs=fs_env, nfft=512, scaling="spectrum") # spectrum
min_freq = 1/(flen_sec*(1/2))
i = np.where((freq < 7) & (freq > min_freq)) # pick indices for spectrum less than lowpass Hz
f = freq[i]
Sxx = np.squeeze(psd[:,i])
return t, f, Sxx