Source code for pecos.utils

"""
The utils module contains helper functions.
"""
import numpy as np
import pandas as pd
import re
import logging

logger = logging.getLogger(__name__)

[docs]def index_to_datetime(index, unit='s', origin='unix'):
    """
    Convert DataFrame index from int/float to datetime,
    rounds datetime to the nearest millisecond
    
    Parameters
    --------------
    index : pandas Index
        DataFrame index in int or float 
    
    unit : str, optional
        Units of the original index
    
    origin : str
        Reference date used to define the starting time.
        If origin = 'unix', the start time is '1970-01-01 00:00:00'
        The origin can also be defined using a datetime string in a similar 
        format (i.e. '2019-05-17 16:05:45')
        
    Returns
    ----------
    pandas Index
        DataFrame index in datetime
    """
    
    index2 = pd.to_datetime(index, unit=unit, origin=origin)
    index2 = index2.round('ms') # round to nearest milliseconds
        
    return index2

[docs]def datetime_to_elapsedtime(index, origin=0.0):
    """
    Convert DataFrame index from datetime to elapsed time in seconds
    
    Parameters
    --------------
    index : pandas Index
        DataFrame index in datetime
    
    origin : float
        Reference for elapsed time
    
    Returns
    ----------
    pandas Index
        DataFrame index in elapsed seconds
    """

    index2 = index - index[0]
    index2 = index2.total_seconds() + origin
    
    return index2

[docs]def datetime_to_clocktime(index):
    """
    Convert DataFrame index from datetime to clocktime (seconds past midnight)
    
    Parameters
    --------------
    index : pandas Index
        DataFrame index in datetime
    
    Returns
    ----------
    pandas Index
        DataFrame index in clocktime
    """
    
    clocktime = index.hour*3600 + index.minute*60 + index.second + index.microsecond/1e6
    
    return clocktime
    
[docs]def datetime_to_epochtime(index):
    """
    Convert DataFrame index from datetime to epoch time
    
    Parameters
    --------------
    index : pandas Index
        DataFrame index in datetime
    
    Returns
    ----------
    pandas Index
        DataFrame index in epoch time
    """
    
    index2 = index.astype('int64')/10**9
    
    return index2

[docs]def round_index(index, frequency, how='nearest'):
    """
    Round DataFrame index
    
    Parameters
    ----------
    index : pandas Index
        Datetime index
    
    frequency : int
        Expected time series frequency, in seconds
    
    how : string, optional
        Method for rounding, default = 'nearest'.  Options include:
        
        * nearest = round the index to the nearest frequency
        * floor = round the index to the smallest expected frequency
        * ceiling = round the index to the largest expected frequency 
        
    Returns
    -------
    pandas Index
        DataFrame index with rounded values
    """

    window_str=str(int(frequency*1e3)) + 'ms' # milliseconds
    
    if how=='nearest':
        rounded_index = index.round(window_str)
    elif how=='floor':
        rounded_index = index.floor(window_str)
    elif how=='ceiling':
        rounded_index = index.ceil(window_str)
    else:
        logger.info("Invalid input, index not rounded")
        rounded_index = index

    return rounded_index


[docs]def evaluate_string(string_to_eval, data=None, trans=None, specs=None, col_name='eval'):
    """
    Returns an evaluated Python string.  WARNING this function calls 'eval'. 
    Strings of Python code should be thoroughly tested by the user.
    
    This function can be useful when defining quality control configuration 
    options in a file, such as:
    
    * Time filters that depend on the data index
    * Quality control bounds that depend on system constants
    * Composite signals that are defined using existing data
    
    For each {keyword} in string_to_eval, {keyword} is expanded in the following order:
    
    * If keyword is ELAPSED_TIME, CLOCK_TIME or EPOCH_TIME then data.index is 
      converted to seconds (elapsed time, clock time, or epoch time) and used 
      in the evaluation (requires data)
    * If keyword is used to select a column (or columns) of data, then 
      data[keyword] is used in the evaluation (requires data) 
    * If a translation dictionary is used to select a column (or columns) of data, then 
      data[trans[keyword]] is used in the evaluation (requires data and trans) 
    * If the keyword is a key in a dictionary of constants, specs, then 
      specs[keyword] is used in the evaluation (requires specs)

    Parameters
    ----------
    string_to_eval : string
        String to evaluate, the string can included multiple keywords and 
        numpy (np.*) and pandas (pd.*) functions
		
    data : pandas DataFrame, optional
        Data, indexed by datetime
		
    trans: dictionary, optional
        Translation dictionary
		
    specs : dictionary, optional
        Keyword:value pairs used to define constants
		
    col_name : string, optional
        Column name used in the returned DataFrame.  If the DataFrame has more 
        than one column, columns are named col_name 0, col_name 1, ...
        
    Returns
    --------
    pandas DataFrame or float
        Evaluated string
    """

    if not isinstance(string_to_eval, str):
        return string_to_eval
    
    match = re.findall(r"\{(.*?)\}", string_to_eval)
    for m in set(match):
        m = m.replace('[','') # check for list

        if m == 'ELAPSED_TIME':
            ELAPSED_TIME = datetime_to_elapsedtime(data.index)
            ELAPSED_TIME = pd.Series(ELAPSED_TIME, index=data.index)
            string_to_eval = string_to_eval.replace("{"+m+"}",m)
        elif m == 'CLOCK_TIME':
            CLOCK_TIME = datetime_to_clocktime(data.index)
            CLOCK_TIME = pd.Series(CLOCK_TIME, index=data.index)
            string_to_eval = string_to_eval.replace("{"+m+"}",m)
        elif m == 'EPOCH_TIME':
            EPOCH_TIME = datetime_to_epochtime(data.index)
            EPOCH_TIME = pd.Series(EPOCH_TIME, index=data.index)
            string_to_eval = string_to_eval.replace("{"+m+"}",m)
        else:
            try:
                data[m]
                datastr = "data[['" + m + "']]" # dataframe
                string_to_eval = string_to_eval.replace("{"+m+"}",datastr)
            except:
                try:
                    data[trans[m]]
                    datastr = "data[trans['" + m + "']]"
                    string_to_eval = string_to_eval.replace("{"+m+"}",datastr)
                except:
                    try:
                        specs[m]
                        datastr = "specs['" + m + "']"
                        string_to_eval = string_to_eval.replace("{"+m+"}",datastr)
                    except:
                        pass

    try:
        signal = eval(string_to_eval)
        
        # Convert Series and tuple of Series to DataFrame
        if isinstance(signal, pd.Series): # Series
            signal = signal.to_frame(0)
        elif isinstance(signal, tuple): # A tuple of series
            signal = pd.DataFrame(signal).T
        
        assert isinstance(signal, (pd.DataFrame, int, float))
        
        # If DataFrame, update column names
        if isinstance(signal, pd.DataFrame): 
            if len(signal.columns) == 1:
                signal.columns = [col_name]
            else:
                signal.columns = [col_name + " " + str(i) for i in range(len(signal.columns))]
        
    except:
        signal = None

    return signal