Source code for phonlab.utils.prep_audio_

__all__=['prep_audio']

import numpy as np
from scipy.signal import resample_poly



[docs]
def prep_audio(x, fs, target_fs=32000, pre = 0, scale = True, 
               add_tiny_noise = True, outtype = "float", pad_to = 0.0,
               quiet = False):
    """ Prepare an array of audio waveform samples for acoustic analysis. 
    
Parameters
==========
    x : array
        a one-dimensional numpy array with audio samples in it. 

    fs : int
          The sampling rate of the sound in **x**.
   
    target_fs : int, default=32000
        The desired sampling rate of the audio samples that will be returned by the function.  
        Set **target_fs = None** if you don't want to change the sampling rate.

    pre : float, default = 0
        how much high frequency preemphasis to apply (between 0 and 1).

    scale: boolean, default = True
        scale the samples to use the full range for audio samples (based on the peak amplitude in the signal)

    add_tiny_noise: boolean, default = True
        add a tiny bit of noise to the audio to avoid problematic waveforms with many samples at zero amplitude.

    pad_to: float, default = 0.0
        add samples so duration is a multiple of `pad_to`. For example, if the duration is 1.99 seconds 
        and `pad_to` is 0.1 then the signal will be padded to 2.0 seconds

    outtype : string {"float", "int"), default = "float"
        The "int" waveform is 16 bit integers - in the range from [-32768, 32767].
        The "float" waveform is 32 bit floating point numbers - in the range from [-1, 1].


Returns
=======
    y : ndarray
        a 1D numpy array with audio samples 
    
    fs : int
        the sampling rate of the audio in **y**.

Note
====
By default, this function will return audio with a sampling rate of 32 kHz and scaled to be in the range from [1,-1]

Example
=======
Open a sound file and prepare it for acoustic analysis.  By default, prep_audio() will 
resample the audio to a sampling rate of 32000, and scale the waveform to use the full range.
In this example, we have also asked the function to apply a preemphasis factor of 1 (about 6dB/octave).

.. code-block:: Python

    y,fs = phon.loadsig("sound.wav",chansel=[0])
    x,fs = phon.prep_audio(y, fs, pre=1)

Take the right channel, and resample to 16,000 Hz

.. code-block:: Python

    *chans,fs = phon.loadsig("sound.wav")
    print(f'the old sampling rate is: {fs}')
    y,fs = phon.prep_audio(chans[1],fs, target_fs=16000)
    print(f'the new sampling rate is: {fs}')

    """
    
    if target_fs == None:
        target_fs = fs
        x2 = x
    else:  # resample to 'target_fs' samples per second
        if target_fs==fs:
            x2 = x
        else:
            if not quiet: 
                print(f'Prep Audio: Resampling from {fs} to {target_fs}')
            cd = np.gcd(fs,target_fs)   # common denominator   
            x2 = resample_poly(x,up=target_fs/cd, down=fs/cd)
        
    
        #resample_ratio = target_fs/fs
        #new_size = int(len(x) * resample_ratio)  # size of the resampled version
        #x2 = resample(x,new_size)  # now sampled at desired sampling freq
        
        
    if (np.max(x2) + np.min(x2)) < 0:  x2 = -x2   #  set the polarity of the signal
    if (pre > 0): y = np.append(x2[0], x2[1:] - pre * x2[:-1])  # apply pre-emphasis
    else: y = x2
    if scale: y = y/np.max(y) * 0.9  # scale to about full range
    if add_tiny_noise:  y = y + ((np.random.rand(len(y)) - 0.5) * 0.00001)
    if pad_to > 0: 
        # check whether any extra are needed
        extra_time = pad_to - ((len(y)/target_fs) % pad_to)
        extra_samples = int(extra_time * target_fs)
        y = np.concatenate((y,(np.random.rand(extra_samples) - 0.5) * 0.00001))
        if not quiet:
            print(f"Prep Audio: Padding signal to {pad_to} sec, which involves adding {extra_samples} extra samples.")
    if outtype == "int":  y = np.rint(np.iinfo(np.int16).max * y).astype(np.int16)
    if outtype == "int16":  y = np.rint(np.iinfo(np.int16).max * y).astype(np.int16)

    return y,target_fs