Source code for phonlab.acoustic.vowel_norm

import pandas as df
import numpy as np

[docs] def get_deltaF(df, return_value = "deltaF"): '''Calcuate the delta F vocal tract length factor from formant values in a dataframe **df**. The estimate is more stable when the dataframe contains a representative set of vowels spoken by the talker (see Johnson, 2020). Parameters ========== df: DataFrame The input dataframe must contain columns for F1, F2, F3, and F4. See phon.track_formants() return_value: string, default = "deltaF" By default the `deltaF` normalization factor is returned. Normalized formant values are `Fn/deltaF`. If you specify `return_value = "VTL"`, then the function will return the estimated vocal tract length as `VTL=35300/(2*∆F)`, where 35300 is the speed of sound (cm/sec) in warm air. Returns ======= deltaF or VTL: numeric the quantity returned depends on the parameter `return_value`. References ========== K. Johnson (2020) The Delta F method of vocal tract length normalization for vowels. `Laboratory Phonology`, 11(1), 10. DOI: http://doi.org/10.5334/labphon.196 Example ======= .. code-block:: Python fmtsdf = phon.track_formants(x,fs) VTL = phon.get_deltaf(fmtsdf,return_value='VTL') ''' deltaf = np.nanmean([[df['F1']/0.5],[df['F2']/1.5],[df['F3']/2.5],[df['F4']/3.5]]) if return_value=="VTL": return 35300/(2*deltaf) else: return deltaf
[docs] def deltaF_norm(df,column = None,deltaF=None): '''Perform vocal tract length normalization (deltaF normalization) for each speaker indicated by a 'groupby' variable in a dataframe of vowel formant measurements. The estimate is more stable when the dataframe contains a representative set of vowels spoken by the talker (see Johnson, 2020). Parameters ========== df: DataFrame The input dataframe must contain columns for F1, F2, F3, and F4. See phon.track_formants(). If multiple dataframes from different talkers have been combined into a large multitalker data frame, then there should be a column identifying the speaker for each row, and the name of this column should be passed as the `groupby` input variable. groupby: string, default=None If `df` contains data from more than one talker, the talker identity should be indicated in a column and the name of that column passed in this input variable. deltaF: numeric or None, default=None Supply a value of deltaF to be used for the normalization. By default the deltaF normalization factor is computed by the function phon.get_deltaF() over the data in the DataFrame you pass in. Note ==== Nothing is returned by this function. The input dataframe is modified in place with the addition of five new columns -- normalized values of the formants 'F1/∆F', 'F2/∆F', 'F3/∆F', 'F4/∆F', and the 'deltaF' factor used for normalization. References ========== K. Johnson (2020) The Delta F method of vocal tract length normalization for vowels. `Laboratory Phonology`, 11(1), 10. DOI: http://doi.org/10.5334/labphon.196 Example ======= .. code-block:: Python fmtsdf = phon.track_formants(x,fs) phon.deltaF_norm(fmtsdf) # add normalized formant columns fmtsdf.head() # now there are five new columns in the dataframe ''' def _norm_one(df,deltaf=None): # this function normalizes based on all observations in the df # use it in a groupby().apply() call to do once for each speaker if deltaf is None: deltaf = get_deltaF(df) # by default calculate deltaf from the data df['F1/∆F'] = df['F1']/deltaf df['F2/∆F'] = df['F2']/deltaf df['F3/∆F'] = df['F3']/deltaf df['F4/∆F'] = df['F4']/deltaf df['deltaF'] = deltaf return df if column is None: result = _norm_one(df,deltaf=deltaF) else: if deltaF is not None: print(f"Grouping by {column}, and ignoring the deltaF value {deltaf}") result = df.groupby(column).apply(_norm_one,include_groups=False) result = result.reset_index() return result
[docs] def resize_vt(df,deltaf): '''Compute new vowel formant values, from normalized values, as produced by the `phonlab.deltaF_norm()` function, using a new target deltaF value to produce a new non-normalized set of vowel formants. This simulates changing the length of the speaker's vocal tract. Parameters ---------- df: DataFrame The input dataframe must contain columns labeled 'F1/∆F', 'F2/∆F', 'F3/∆F', and 'F4/∆F'. See `phon.track_formants()` and `phon.deltaF_norm()`. These should be data from a single speaker. deltaf: float The new deltaF value that will be used to calcuate the non-normalized values from the Fx/∆F normalized values. Returns ------- df: DataFrame New columns called 'new_F1', 'new_F2', etc. are added to the input DataFrame. ''' if 'F1/∆F' in df.columns: df['new_F1'] = df['F1/∆F'] * deltaf if 'F2/∆F' in df.columns: df['new_F2'] = df['F2/∆F'] * deltaf if 'F3/∆F' in df.columns: df['new_F3'] = df['F3/∆F'] * deltaf if 'F4/∆F' in df.columns: df['new_F4'] = df['F4/∆F'] * deltaf return df