Source code for phonlab.acoustic.vowel_norm

import pandas as df
import numpy as np


[docs]
def get_deltaF(df, return_value = "deltaF"):
    '''Calcuate the delta F vocal tract length factor from formant values in a dataframe **df**.  The estimate is more stable when the dataframe contains a representative set of vowels spoken by the talker (see Johnson, 2020).

Parameters
==========

df: DataFrame
    The input dataframe must contain columns for F1, F2, F3, and F4.  See phon.track_formants()

return_value: string, default = "deltaF"
    By default the `deltaF` normalization factor is returned. Normalized formant values are `Fn/deltaF`.  If you specify `return_value = "VTL"`, then the function will return the estimated vocal tract length as `VTL=35300/(2*∆F)`, where 35300 is the speed of sound (cm/sec) in warm air.

Returns
=======

deltaF or VTL: numeric
    the quantity returned depends on the parameter `return_value`.

References
==========

K. Johnson (2020) The Delta F method of vocal tract length normalization for vowels. `Laboratory Phonology`, 11(1), 10. DOI: http://doi.org/10.5334/labphon.196

Example
=======

.. code-block:: Python

    fmtsdf = phon.track_formants(x,fs)
    VTL = phon.get_deltaf(fmtsdf,return_value='VTL')

    '''
    deltaf = np.nanmean([[df['F1']/0.5],[df['F2']/1.5],[df['F3']/2.5],[df['F4']/3.5]])

    if return_value=="VTL":
        return 35300/(2*deltaf)
    else:
        return deltaf



[docs]
def deltaF_norm(df,column = None,deltaF=None):
    '''Perform vocal tract length normalization (deltaF normalization) for each speaker indicated by a 'groupby' variable in a dataframe of vowel formant measurements.  The estimate is more stable when the dataframe contains a representative set of vowels spoken by the talker (see Johnson, 2020).

Parameters
==========

df: DataFrame
    The input dataframe must contain columns for F1, F2, F3, and F4.  See phon.track_formants().  If multiple dataframes from different talkers have been combined into a large multitalker data frame, then there should be a column identifying the speaker for each row, and the name of this column should be passed as the `groupby` input variable.

groupby: string, default=None
    If `df` contains data from more than one talker, the talker identity should be indicated in a column and the name of that column passed in this input variable.

deltaF: numeric or None, default=None
    Supply a value of deltaF to be used for the normalization.  By default the deltaF normalization factor is computed by the function phon.get_deltaF() over the data in the DataFrame you pass in. 

Note
====

Nothing is returned by this function.  The input dataframe is modified in place with the addition of five new columns -- normalized values of the formants 'F1/∆F', 'F2/∆F', 'F3/∆F', 'F4/∆F', and the 'deltaF' factor used for normalization.

References
==========

K. Johnson (2020) The Delta F method of vocal tract length normalization for vowels. `Laboratory Phonology`, 11(1), 10. DOI: http://doi.org/10.5334/labphon.196

Example
=======

.. code-block:: Python

    fmtsdf = phon.track_formants(x,fs)
    phon.deltaF_norm(fmtsdf)  # add normalized formant columns
    fmtsdf.head()  # now there are five new columns in the dataframe

    '''
    def _norm_one(df,deltaf=None):  
        # this function normalizes based on all observations in the df
        # use it in a groupby().apply() call to do once for each speaker
        if deltaf is None:
            deltaf = get_deltaF(df)  # by default calculate deltaf from the data
    
        df['F1/∆F'] = df['F1']/deltaf
        df['F2/∆F'] = df['F2']/deltaf
        df['F3/∆F'] = df['F3']/deltaf
        df['F4/∆F'] = df['F4']/deltaf

        df['deltaF'] = deltaf

        return df

    if column is None:
        result = _norm_one(df,deltaf=deltaF)
    else:
        if deltaF is not None:
            print(f"Grouping by {column}, and ignoring the deltaF value {deltaf}")
        result = df.groupby(column).apply(_norm_one,include_groups=False)
        result = result.reset_index()
       
    return result



[docs]
def resize_vt(df,deltaf):  
    '''Compute new vowel formant values, from normalized values, as produced by the `phonlab.deltaF_norm()`
function, using a new target deltaF value to produce a new non-normalized set of vowel formants.  
This simulates changing the length of the speaker's vocal tract.

Parameters
----------
df: DataFrame
    The input dataframe must contain columns labeled 'F1/∆F', 'F2/∆F', 'F3/∆F', and 'F4/∆F'.  See `phon.track_formants()` and 
    `phon.deltaF_norm()`.  These should be data from a single speaker.
deltaf: float
    The new deltaF value that will be used to calcuate the non-normalized values from the Fx/∆F normalized values.

Returns
-------

df: DataFrame
    New columns called 'new_F1', 'new_F2', etc. are added to the input DataFrame.

    '''
    
    if 'F1/∆F' in df.columns:
        df['new_F1'] = df['F1/∆F'] * deltaf
    if 'F2/∆F' in df.columns:
        df['new_F2'] = df['F2/∆F'] * deltaf
    if 'F3/∆F' in df.columns:
        df['new_F3'] = df['F3/∆F'] * deltaf
    if 'F4/∆F' in df.columns:
        df['new_F4'] = df['F4/∆F'] * deltaf
        
    return df