Source code for phonlab.acoustic.sgram_

from scipy.signal import spectrogram
from scipy.signal import windows
import numpy as np
import matplotlib.pyplot as plt
from ..utils.prep_audio_ import prep_audio


[docs]
def compute_sgram(x,fs,w,s=0.001,order=13):
    """Compute a spectrogram from input waveform array of samples.
    
    Parameters
    ==========
    x : ndarray
        array of audio samples
    fs : integer
        The sampling frequency of the audio samples in `x` 
    w : float
        Length in seconds of the analysis window.  For an effective filter bandwidth of 300 Hz use w = 0.008, and for an effective filter bandwidth of 45 Hz use w = 0.04.
    s : float, default=0.001
        The time (in seconds) between adjacent spectra in the spectrogram. 
    order : integer, default = 13
        This parameter determines the number of points in the FFT analysis that produces the spectrogram.  The number of points will be a power of 2 (2**order) and should be larder than the number of points in the analysis window (which is w*fs).

    Returns
    ======= 
    t : ndarray
        Array of segment times.
    f : ndarray
        Array of sample frequencies.
    Sxx : ndarray
        Spectrogram of the audio. By default, the last axis of Sxx corresponds to the segment times.
        It is the magnitude spectrum on the decibel scale, so 20 * log10(Sxx) of the spectrogram
        returned by scipy.signal.spectrogram.


    """
    step = s  # step size between spectral slices (sec)
     
    # set up parameters for signal.spectrogram()
    noverlap = int((w-step)*fs) # skip forward by step between each frame
    nperseg = int(w*fs)         # number of samples per waveform window
    nfft = np.power(2,order)    # number of points in the fft
    window = windows.blackmanharris(nperseg)

    f,ts,Sxx = spectrogram(x,fs=fs,noverlap = noverlap, window=window, nperseg = nperseg, 
                              nfft = nfft, scaling='spectrum', mode = 'magnitude')
    Sxx = 10 * np.log10(Sxx)  # put spectrum on decibel scale

    return (ts, f, Sxx)

    


[docs]
def sgram(x,fs, start=0, end=-1, tf=8000, band='wb', preemph = 0.94, font_size = 14,
    min_prop = 0.55, save_name='', slice_time=-1, cmap='Greys', resize=True, ax=None):
    """Make pretty good looking spectrograms

    * This function calls scipy.signal.spectrogram to calculate a magnitude spectrogram, which is then transformed to decibels, and passed to plt.imshow for plotting.  
    
    * It mainly is used to produce nice looking figures with features like readable time and frequency axes, scaling so that the time axis is 6.5 inches per second for spectrograms of less than 2 seconds.
    
    * The function also returns arrays that you can use to create your own figures.  
    
    * The function uses one of two window lengths - 40 msec for narrow band spectrograms, or 8 msec for wideband spectrograms.  

    * One option is to add a "spectral slice" to the display - the amplitude/frequency spectrum at a particular point in time.

    Parameters
    ==========
    x : ndarray
        a one-dimensional array of audio samples.
    fs : numeric
        The sampling rate of the audio in **x**
    start : float, default = 0
        starting time (in seconds) of the waveform chunk to plot -- default plot whole file
    end : float, default = -1
        ending time (in seconds) of the waveform chunk to plot (-1 means go to the end)
    tf : integer, default = 8000
        the top frequency (in Hz) to show in the spectrogram
    band : string, {'wb','nb'}
        effective filter bandwidth of the analysis filter ('wb' = 300 Hz, 'nb' = 45 Hz)
    preemph : float, default = 0.94
        add high frequency preemphasis before making the spectrogram, a value between 0 and 1
    font_size : integer, default = 14
        the font size to use for the axis labels and tick labels.
    min_prop : float, default = 0.2
        set the 'floor' of the gray scale.  The default value specifies that the floor will be 
        at 55% of the range between min and max amplitudes. 
    save_name : Path, default = ''
        name of a file to save the figure pyplot.savefig(), by default no file is saved.
    slice_time : float, default = -1
        location (in seconds) of an optional spectral slice.
    resize : boolean, default = True
        if a matplotlib axes object is passed in (the `ax` parameter), resize it to 
        classical spectrogram dimensions.
    ax : a matplotlib axes object, default = None
        the user may provide a matplotlib object to which the spectrogram will be plotted.
    cmap : string, default = "Grays"
        name of a matplotlib colormap for the spectrogram

    Returns
    ======= 
    ax : a matplotlib axes object
        The plot axes is returned
    f : ndarray
        Array of sample frequencies.
    t : ndarray
        Array of segment times.
    Sxx : ndarray
        Spectrogram of the audio. By default, the last axis of Sxx corresponds to the segment times.
        It is the magnitude spectrum on the decibel scale, so 20 * log10(Sxx) of the spectrogram
        returned by scipy.signal.spectrogram.

    Examples
    ========

    Plot a spectrogram of a portion of the sound file from 1.5 to 2 seconds.  
    Then add a vertical red line at time 1.71

    .. code-block:: Python

        import matplotlib.pyplot as plt

        audio_dir = importlib.resources.files('phonlab') / 'data' / 'example_audio'
        example_file = audio_dir / 'sf3_cln.wav'
        
        x,fs = phon.loadsig(example_file,chansel=[0])
        phon.sgram(x,fs,start=1.5, end=2.0)
        plt.axvline(1.71,color="red")

    .. figure:: images/burst.png
       :scale: 50 %
       :alt: a spectrogram with a red line marking the location of the burst
       :align: center

       Marking the burst found by `phon.burst()`

    Read a file into an array `x`, track the formant frequencies in the file, use them to produce
    sine wave speech, and then plot a spectrogram of the resulting signal.

    .. code-block:: Python

        example_file = importlib.resources.files('phonlab') / 'data' / 'example_audio' / 'sf3_cln.wav'

        x,fs = phon.loadsig(example_file, chansel=[0]) 
        fmtsdf = phon.track_formants(x,fs)    # track the formants
        x2,fs2 = phon.sine_synth(fmtsdf)     # use the formants to produce sinewave synthesis
        ax1,f,t,Sxx = phon.sgram(x2,fs2, band="nb", preemph=0)  # plot a spectrogram of it

    .. figure:: images/sine_synth.png
       :scale: 40 %
       :alt: a spectrogram of sine-wave synthesis
       :align: center

       Showing the spectrogram of sine-wave synthesis.

    """
    target_fs = tf*2    # top frequency is the Nyquist frequency for the analysis

    if band=='nb':
        w = 0.04    # analysis window size for narrow band spectrogram (sec)
    else:
        w = 0.008   # analysis window size for wide band spectrogram

    # set up parameters for the spectrogram window
    figheight = 4.5  # height in inches
    max_figwidth = 12 # maximum figure width in inches
    inches_per_sec = 6.5 # desired width scaling of printed spectrogram
    slice_width = 1.5  # how much space to give to the spectral slice
    cmap = plt.get_cmap(cmap)
    
    # ----------- condition waveform -----------------------
    x2, fs = prep_audio(x,fs, target_fs = target_fs, pre = preemph, quiet=True)

    i1 = int(start * fs)   # index of starting time: seconds to samples
    i2 = int(end * fs)     # index of ending time
    if i2<0 or i2>len(x2):  # stop at the end of the waveform
        i2 = len(x2)
    if i1>i2:              # don't let start follow end
        i1=0
    
    
    # ----------- compute the spectrogram ---------------------------------
    ts,f,Sxx = compute_sgram(x2[i1:i2],fs,w)
    
    # ------------ display in a matplotlib figure --------------------
    ts = np.add(ts,start)  # increment the spectrogram times by the start value
    dur = max(ts)-min(ts) + w   # scale figure size
    figwidth = np.min([(dur * inches_per_sec), max_figwidth])
    if ax is None:
        if slice_time>0: # if spectral slice is desired, add an axes for it
            fig = plt.figure(figsize=(figwidth+slice_width, figheight),dpi=72)
            gs = fig.add_gridspec(nrows=1, ncols=2, width_ratios=[figwidth/slice_width, 1])
            ax1 = fig.add_subplot(gs[0])
            ax2 = fig.add_subplot(gs[1])
        else:
            fig = plt.figure(figsize=(figwidth, figheight),dpi=72)
            ax1 = fig.add_subplot(111)
    else:
        if resize:
            fig = plt.gcf()  # get the current figure
            fig.set_size_inches(figwidth, figheight) # resize it by the values here
        ax1 = ax

    vmin = np.min(Sxx) + (np.max(Sxx)-np.min(Sxx))*min_prop
    extent = (min(ts),max(ts),min(f),max(f))  # get the time and frequency values for indices.
    im = ax1.imshow(Sxx, aspect='auto', interpolation='nearest', cmap=cmap, vmin = vmin, 
                extent = extent, origin='lower')
    ax1.grid(which='major', axis='y', linestyle='-')  # add grid lines
    ax1.set_xlabel("Time (sec)", size=font_size)
    ax1.set_ylabel("Frequency (Hz)", size=font_size)
    ax1.tick_params(labelsize=font_size)
    ax1.locator_params(axis='y', prune="upper")  # for stacking sgram with other axes
    plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
    # plt.subplots_adjust(left=0.1, bottom=0.148, right=0.99, top=0.99, wspace=0, hspace=0)

    if slice_time > 0:  # if spectral slice is desired, plot the spectrum
        i = np.argmin(np.abs(ts-slice_time))  # find the index of the spectral slice
        ax1.axvline(x=slice_time,color='black',linestyle="--")
        spectrum = Sxx.T[i]  
        ax2.plot(spectrum,f,color='black') 
        ax2.grid(which='major', axis='y', linestyle=':')  # add grid lines
        ax2.set_ymargin(0)    # put y-axis at bottom and top of axis (as in spectrogram)
        ax2.tick_params(labelleft=False,labelsize=font_size)  # do not write the frequency axis labels
    
    if len(save_name)>0:
        print(f'Saving file: {save_name}')
        plt.savefig(save_name,dpi=300,bbox_inches='tight')
        
    return (ax1, f,ts,Sxx)