import numpy as np
import matplotlib.pyplot as plt
import random
import colorednoise as cn
import librosa
from importlib.resources import files as res_files
from ..utils.prep_audio_ import prep_audio
[docs]
def peak_rms(y):
"""
Return the peak rms amplitude
The function uses the librosa.feature.rms to calculate an RMS contour from short time Fourier transforms taken from windows of 2048 samples with a step of 512 samples (the librosa.stft defaults). This makes for different window lengths (in terms of seconds) depending on the sampling rate.
Parameters
==========
y : ndarray
a one-dimensional array of audio waveform samples
Returns
=======
float
the maximum rms value in y
"""
S = np.abs(librosa.stft(y))
rms = librosa.feature.rms(S = S)
return np.max(rms)
[docs]
def add_noise(x, fs, noise_type="white", snr = 0, target_amp = -2):
"""
Add noise to audio
This function is partially adapted from matlab code written by Kamil Wojcicki, UTD, July 2011. It does the following:
* pads the audio signal with 1/2 second of silence at the beginning and end
* takes an audio file and mixes it with a noise (or a passed audio file) at a specified signal to noise ratio.
* scales the peak intensity of the resulting mixed audio to prevent clipping
* writes the resulting mixed audio as .wav files to an output directory
Parameters
==========
x : array
A one-dimensional array of audio samples
fs : int
the sampling frequency of the audio in **x**
noise_type : string, default = "white"
The type of noise - one of "pink", "white", "brown", 'babble', 'party', or 'restaurant'.
snr : float, default = 0
the signal to noise ratio in dB. 0 means that the signal peak RMS amplitude will be the same as the noise amplitude. Less than zero (e.g. -5) means that the signal amplitude will be lower than the noise, and greater than zero means that the signal amplitude will be greater than the noise amplitude.
target_amp : number, default = -2
Scale the resulting signal (the result of adding the noise to the signal) so that the peak amplitude is target_amp relative to the maximum possible value. Use a negative number to avoid clipping. -2 means scale the resulting signal so that it is -2 dB below the maximum for digital audio files.
Returns
=======
y : ndarray
The result of adding noise to the signal
fs : int
The sampling rate of the signal in **y**
Raises
======
ValueError
if the noise_type is not a valid type
Example
=======
This example adds white noise at a signal-to-noise ratio (SNR) of 3 dB
.. code-block:: Python
x,fs = phon.loadsig("sf3_cln.wav",chansel=[0])
y,fs = phon.add_noise(x,fs,"white",snr=3)
phon.sgram(x,fs)
.. figure:: images/add_noise.png
:scale: 90 %
:alt: a spectrogram a speech sample buried in white noise
:align: center
The result of adding white noise.
"""
# Valid options that can be passed to the `sox` `synth` effect.
colored_noise = (
'brown', 'pink', 'white'
)
# Names of files in the package data/noise directory.
pkg_noise = (
'babble', 'party', 'restaurant'
)
signal_peak = peak_rms(x)
pad = np.zeros(int(fs/2)) # number of points in 1/2 a second
x = np.append(np.append(pad,x),pad) #add 500 ms of silence before/after signal,
# the stimulus will begin 500 ms after the onset of the noise after
if noise_type in colored_noise:
if (noise_type == 'pink'):
beta = 1 # the exponent for pink noise
elif (noise_type == 'white'):
beta = 0
elif (noise_type == 'brown'):
beta = 2
noise_rate = fs #sampling rate of the signal
noise = cn.powerlaw_psd_gaussian(beta, len(x)) #generate the noise samples
elif noise_type in pkg_noise: # noise is an audiofile
noise_file = res_files('phonlab') / 'data' / 'noise' / f'{noise_type}.wav'
noise, noise_rate = librosa.load(noise_file, sr = fs) # resample to the rate of the signal
#get length of signal and noise files
s = len( x )
n = len( noise )
while ( s > n ): # noise must be longer than signal
noise = np.concatenate([noise,noise]) # rude way to grow the noise sample by doubling
n= len(noise)
# generate a random start location in the noise signal to extract a random section of it
r = random.randint(1,1+n-s)
noise = noise[r:r+s]
else:
print(f"{noise_type} must be one of 'pink', 'white', 'brown', 'babble', 'party', or 'restaurant'")
exit()
noise_peak = peak_rms(noise)
# scale the noise file w.r.t. to target at desired SNR level (arrays must be the same length)
noise = noise / noise_peak * signal_peak / np.power(10.0, snr/20) # peak amp
# or noise = noise / np.linalg.norm(noise) * np.linalg.norm(signal) / np.power(10.0,snr/20) # whole file (Wojcicki)
# mix the noise and audio files
mixed_audio = x + noise
# calculate the gain needed to scale to the desired peak RMS level (-3dB usually, below max)
current_peak = np.max(np.abs(mixed_audio))
gain = np.power(10.0, target_amp/20.0) / current_peak
return gain * mixed_audio, fs