Source code for phonlab.acoustic.sgram_

from scipy.signal import spectrogram
from scipy.signal import windows
import numpy as np
import matplotlib.pyplot as plt
from ..utils.prep_audio_ import prep_audio

[docs] def compute_sgram(x,fs,w,s=0.001,order=13): """Compute a spectrogram from input waveform array of samples. Parameters ========== x : ndarray array of audio samples fs : integer The sampling frequency of the audio samples in `x` w : float Length in seconds of the analysis window. For an effective filter bandwidth of 300 Hz use w = 0.008, and for an effective filter bandwidth of 45 Hz use w = 0.04. s : float, default=0.001 The time (in seconds) between adjacent spectra in the spectrogram. order : integer, default = 13 This parameter determines the number of points in the FFT analysis that produces the spectrogram. The number of points will be a power of 2 (2**order) and should be larder than the number of points in the analysis window (which is w*fs). Returns ======= t : ndarray Array of segment times. f : ndarray Array of sample frequencies. Sxx : ndarray Spectrogram of the audio. By default, the last axis of Sxx corresponds to the segment times. It is the magnitude spectrum on the decibel scale, so 20 * log10(Sxx) of the spectrogram returned by scipy.signal.spectrogram. """ step = s # step size between spectral slices (sec) # set up parameters for signal.spectrogram() noverlap = int((w-step)*fs) # skip forward by step between each frame nperseg = int(w*fs) # number of samples per waveform window nfft = np.power(2,order) # number of points in the fft window = windows.blackmanharris(nperseg) f,ts,Sxx = spectrogram(x,fs=fs,noverlap = noverlap, window=window, nperseg = nperseg, nfft = nfft, scaling='spectrum', mode = 'magnitude') Sxx = 10 * np.log10(Sxx) # put spectrum on decibel scale return (ts, f, Sxx)
[docs] def sgram(x,fs, start=0, end=-1, tf=8000, band='wb', preemph = 0.94, font_size = 14, min_prop = 0.55, save_name='', slice_time=-1, cmap='Greys', resize=True, ax=None): """Make pretty good looking spectrograms * This function calls scipy.signal.spectrogram to calculate a magnitude spectrogram, which is then transformed to decibels, and passed to plt.imshow for plotting. * It mainly is used to produce nice looking figures with features like readable time and frequency axes, scaling so that the time axis is 6.5 inches per second for spectrograms of less than 2 seconds. * The function also returns arrays that you can use to create your own figures. * The function uses one of two window lengths - 40 msec for narrow band spectrograms, or 8 msec for wideband spectrograms. * One option is to add a "spectral slice" to the display - the amplitude/frequency spectrum at a particular point in time. Parameters ========== x : ndarray a one-dimensional array of audio samples. fs : numeric The sampling rate of the audio in **x** start : float, default = 0 starting time (in seconds) of the waveform chunk to plot -- default plot whole file end : float, default = -1 ending time (in seconds) of the waveform chunk to plot (-1 means go to the end) tf : integer, default = 8000 the top frequency (in Hz) to show in the spectrogram band : string, {'wb','nb'} effective filter bandwidth of the analysis filter ('wb' = 300 Hz, 'nb' = 45 Hz) preemph : float, default = 0.94 add high frequency preemphasis before making the spectrogram, a value between 0 and 1 font_size : integer, default = 14 the font size to use for the axis labels and tick labels. min_prop : float, default = 0.2 set the 'floor' of the gray scale. The default value specifies that the floor will be at 55% of the range between min and max amplitudes. save_name : Path, default = '' name of a file to save the figure pyplot.savefig(), by default no file is saved. slice_time : float, default = -1 location (in seconds) of an optional spectral slice. resize : boolean, default = True if a matplotlib axes object is passed in (the `ax` parameter), resize it to classical spectrogram dimensions. ax : a matplotlib axes object, default = None the user may provide a matplotlib object to which the spectrogram will be plotted. cmap : string, default = "Grays" name of a matplotlib colormap for the spectrogram Returns ======= ax : a matplotlib axes object The plot axes is returned f : ndarray Array of sample frequencies. t : ndarray Array of segment times. Sxx : ndarray Spectrogram of the audio. By default, the last axis of Sxx corresponds to the segment times. It is the magnitude spectrum on the decibel scale, so 20 * log10(Sxx) of the spectrogram returned by scipy.signal.spectrogram. Examples ======== Plot a spectrogram of a portion of the sound file from 1.5 to 2 seconds. Then add a vertical red line at time 1.71 .. code-block:: Python import matplotlib.pyplot as plt audio_dir = importlib.resources.files('phonlab') / 'data' / 'example_audio' example_file = audio_dir / 'sf3_cln.wav' x,fs = phon.loadsig(example_file,chansel=[0]) phon.sgram(x,fs,start=1.5, end=2.0) plt.axvline(1.71,color="red") .. figure:: images/burst.png :scale: 50 % :alt: a spectrogram with a red line marking the location of the burst :align: center Marking the burst found by `phon.burst()` Read a file into an array `x`, track the formant frequencies in the file, use them to produce sine wave speech, and then plot a spectrogram of the resulting signal. .. code-block:: Python example_file = importlib.resources.files('phonlab') / 'data' / 'example_audio' / 'sf3_cln.wav' x,fs = phon.loadsig(example_file, chansel=[0]) fmtsdf = phon.track_formants(x,fs) # track the formants x2,fs2 = phon.sine_synth(fmtsdf) # use the formants to produce sinewave synthesis ax1,f,t,Sxx = phon.sgram(x2,fs2, band="nb", preemph=0) # plot a spectrogram of it .. figure:: images/sine_synth.png :scale: 40 % :alt: a spectrogram of sine-wave synthesis :align: center Showing the spectrogram of sine-wave synthesis. """ target_fs = tf*2 # top frequency is the Nyquist frequency for the analysis if band=='nb': w = 0.04 # analysis window size for narrow band spectrogram (sec) else: w = 0.008 # analysis window size for wide band spectrogram # set up parameters for the spectrogram window figheight = 4.5 # height in inches max_figwidth = 12 # maximum figure width in inches inches_per_sec = 6.5 # desired width scaling of printed spectrogram slice_width = 1.5 # how much space to give to the spectral slice cmap = plt.get_cmap(cmap) # ----------- condition waveform ----------------------- x2, fs = prep_audio(x,fs, target_fs = target_fs, pre = preemph, quiet=True) i1 = int(start * fs) # index of starting time: seconds to samples i2 = int(end * fs) # index of ending time if i2<0 or i2>len(x2): # stop at the end of the waveform i2 = len(x2) if i1>i2: # don't let start follow end i1=0 # ----------- compute the spectrogram --------------------------------- ts,f,Sxx = compute_sgram(x2[i1:i2],fs,w) # ------------ display in a matplotlib figure -------------------- ts = np.add(ts,start) # increment the spectrogram times by the start value dur = max(ts)-min(ts) + w # scale figure size figwidth = np.min([(dur * inches_per_sec), max_figwidth]) if ax is None: if slice_time>0: # if spectral slice is desired, add an axes for it fig = plt.figure(figsize=(figwidth+slice_width, figheight),dpi=72) gs = fig.add_gridspec(nrows=1, ncols=2, width_ratios=[figwidth/slice_width, 1]) ax1 = fig.add_subplot(gs[0]) ax2 = fig.add_subplot(gs[1]) else: fig = plt.figure(figsize=(figwidth, figheight),dpi=72) ax1 = fig.add_subplot(111) else: if resize: fig = plt.gcf() # get the current figure fig.set_size_inches(figwidth, figheight) # resize it by the values here ax1 = ax vmin = np.min(Sxx) + (np.max(Sxx)-np.min(Sxx))*min_prop extent = (min(ts),max(ts),min(f),max(f)) # get the time and frequency values for indices. im = ax1.imshow(Sxx, aspect='auto', interpolation='nearest', cmap=cmap, vmin = vmin, extent = extent, origin='lower') ax1.grid(which='major', axis='y', linestyle='-') # add grid lines ax1.set_xlabel("Time (sec)", size=font_size) ax1.set_ylabel("Frequency (Hz)", size=font_size) ax1.tick_params(labelsize=font_size) ax1.locator_params(axis='y', prune="upper") # for stacking sgram with other axes plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) # plt.subplots_adjust(left=0.1, bottom=0.148, right=0.99, top=0.99, wspace=0, hspace=0) if slice_time > 0: # if spectral slice is desired, plot the spectrum i = np.argmin(np.abs(ts-slice_time)) # find the index of the spectral slice ax1.axvline(x=slice_time,color='black',linestyle="--") spectrum = Sxx.T[i] ax2.plot(spectrum,f,color='black') ax2.grid(which='major', axis='y', linestyle=':') # add grid lines ax2.set_ymargin(0) # put y-axis at bottom and top of axis (as in spectrogram) ax2.tick_params(labelleft=False,labelsize=font_size) # do not write the frequency axis labels if len(save_name)>0: print(f'Saving file: {save_name}') plt.savefig(save_name,dpi=300,bbox_inches='tight') return (ax1, f,ts,Sxx)