Source code for phonlab.auditory.add_noise_

import numpy as np
import matplotlib.pyplot as plt
import random
import colorednoise as cn
import librosa
from importlib.resources import files as res_files
from ..utils.prep_audio_ import prep_audio


[docs]
def peak_rms(y):
    """
Return the peak rms amplitude

The function uses the librosa.feature.rms to calculate an RMS contour from short time Fourier transforms taken from windows of 2048 samples with a step of 512 samples (the librosa.stft defaults).  This makes for different window lengths (in terms of seconds) depending on the sampling rate.  

Parameters
==========
    y : ndarray
        a one-dimensional array of audio waveform samples

Returns
=======
    float
        the maximum rms value in y

    """
    
    S = np.abs(librosa.stft(y))
    rms = librosa.feature.rms(S = S)

    return np.max(rms)



[docs]
def add_noise(x, fs, noise_type="white", snr = 0, target_amp = -2):
    """
Add noise to audio

This function is partially adapted from matlab code written by Kamil Wojcicki, UTD, July 2011. It does the following:

    * pads the audio signal with 1/2 second of silence at the beginning and end
    * takes an audio file and mixes it with a noise (or a passed audio file) at a specified signal to noise ratio.
    * scales the peak intensity of the resulting mixed audio to prevent clipping
    * writes the resulting mixed audio as .wav files to an output directory


Parameters
==========
x : array
    A one-dimensional array of audio samples
fs : int
    the sampling frequency of the audio in **x**
   
noise_type : string, default = "white"
        The type of noise - one of "pink", "white", "brown", 'babble', 'party', or 'restaurant'.
        
snr : float, default = 0
        the signal to noise ratio in dB.  0 means that the signal peak RMS amplitude will be the same as the noise amplitude. Less than zero (e.g. -5) means that the signal amplitude will be lower than the noise, and greater than zero means that the signal amplitude will be greater than the noise amplitude.
        
target_amp : number, default = -2
        Scale the resulting signal (the result of adding the noise to the signal) so that the peak amplitude is target_amp relative to the maximum possible value.  Use a negative number to avoid clipping.  -2 means scale the resulting signal so that it is -2 dB below the maximum for digital audio files.

Returns
=======
    y : ndarray
        The result of adding noise to the signal
    fs : int
        The sampling rate of the signal in **y**

Raises
======
    ValueError 
        if the noise_type is not a valid type


Example
=======
This example adds white noise at a signal-to-noise ratio (SNR) of 3 dB

.. code-block:: Python

     x,fs = phon.loadsig("sf3_cln.wav",chansel=[0])
     y,fs = phon.add_noise(x,fs,"white",snr=3)
     phon.sgram(x,fs)

.. figure:: images/add_noise.png
   :scale: 90 %
   :alt: a spectrogram a speech sample buried in white noise
   :align: center

   The result of adding white noise.

    """
    # Valid options that can be passed to the `sox` `synth` effect.
    colored_noise = (
        'brown', 'pink', 'white'
    )
    # Names of files in the package data/noise directory. 
    pkg_noise = (
        'babble', 'party', 'restaurant'
    )

    signal_peak = peak_rms(x)
    
    pad = np.zeros(int(fs/2))  # number of points in 1/2 a second
    x = np.append(np.append(pad,x),pad) #add 500 ms of silence before/after signal, 
            # the stimulus will begin 500 ms after the onset of the noise after

    if noise_type in colored_noise:
        if (noise_type == 'pink'):
            beta = 1 # the exponent for pink noise
        elif (noise_type == 'white'):
            beta = 0 
        elif (noise_type == 'brown'):
            beta = 2
        noise_rate = fs  #sampling rate of the signal 
        noise = cn.powerlaw_psd_gaussian(beta, len(x))  #generate the noise samples
 
    elif noise_type in pkg_noise:  # noise is an audiofile
        noise_file = res_files('phonlab') / 'data' / 'noise' / f'{noise_type}.wav'
        noise, noise_rate = librosa.load(noise_file, sr = fs)  # resample to the rate of the signal
        
        #get length of signal and noise files
        s = len( x )
        n = len( noise )
    
        while ( s > n ):  # noise must be longer than signal
            noise = np.concatenate([noise,noise])  # rude way to grow the noise sample by doubling
            n= len(noise)
    
        # generate a random start location in the noise signal to extract a random section of it 
        r = random.randint(1,1+n-s)
        noise = noise[r:r+s]
    else:
        print(f"{noise_type} must be one of 'pink', 'white', 'brown', 'babble', 'party', or 'restaurant'")
        exit()
        
    noise_peak = peak_rms(noise)

    # scale the noise file w.r.t. to target at desired SNR level (arrays must be the same length)
    noise = noise / noise_peak * signal_peak / np.power(10.0, snr/20) # peak amp
    # or noise = noise / np.linalg.norm(noise) * np.linalg.norm(signal) / np.power(10.0,snr/20)  # whole file (Wojcicki)

    # mix the noise and audio files 
    mixed_audio = x + noise
    
    # calculate the gain needed to scale to the desired peak RMS level (-3dB usually, below max)
    current_peak = np.max(np.abs(mixed_audio))
    gain = np.power(10.0, target_amp/20.0) / current_peak
    
    return gain * mixed_audio, fs