import datetime as dt
import logging
import numpy as np
import pandas as pd
__all__ = ['make_timeseries', 'clean_convert']
[docs]def make_timeseries(x=None, year=None, length=None, startdate=None, freq=None):
"""Convert numpy array to a pandas series with a timed index. Convenience wrapper around a datetime-indexed pd.DataFrame.
Parameters:
x: (nd.array) raw data to wrap into a pd.Series
startdate: pd.datetime
year: year of timeseries
freq: offset keyword (e.g. 15min, H)
length: length of timeseries
Returns:
pd.Series or pd.Dataframe with datetimeindex
"""
if startdate is None:
if year is None:
logging.info('No info on the year was provided. Using current year')
year = dt.datetime.now().year
startdate = dt.datetime(year, 1, 1, 0, 0, 0)
if x is None:
if length is None:
raise ValueError('The length or the timeseries has to be provided')
else: # if x is given
length = len(x)
if freq is None:
# Shortcuts: Commonly used frequencies are automatically assigned
if len(x) == 8760 or len(x) == 8784:
freq = 'H'
elif len(x) == 35040:
freq = '15min'
elif len(x) == 12:
freq = 'm'
else:
raise ValueError('Input vector length must be 12, 8760 or 35040. Otherwise freq has to be defined')
#enddate = startdate + pd.datetools.timedelta(seconds=_freq_to_sec(freq) * (length - 1) )
date_list = pd.date_range(start=startdate, periods=length, freq=freq)
if x is None:
return pd.Series(np.nan, index=date_list)
elif isinstance(x, (pd.DataFrame, pd.Series)):
x.index = date_list
return x
elif isinstance(x, (np.ndarray, list)):
if len(x.shape) > 1:
return pd.DataFrame(x, index=date_list)
else:
return pd.Series(x, index=date_list)
else:
raise ValueError('Unknown type of data passed')
def _freq_to_sec(freq_keyword):
""" Converts pandas frequency string keyword to seconds. Not all frequency offsets can be converted to seconds.
Arguments:
freq_keyword: frequency based on pandas offsets
Returns:
int: corresponding seconds """
from pandas.tseries.frequencies import to_offset
try:
return to_offset(freq_keyword).nanos * 1E-9
except ValueError as e:
raise ValueError('Works only with fixed frequencies e.g. h,s,t', e)
def human_readable_time(delta, terms=1):
"""Convert hours to human readable string
Arguments:
delta: time in seconds
terms: how many word terms to use, to describe the timestep
Returns
str: Human readable string
"""
# Inspired by http://stackoverflow.com/questions/26164671/convert-seconds-to-readable-format-time
from dateutil.relativedelta import relativedelta
intervals = ['years', 'months', 'days', 'hours', 'minutes', 'seconds']
if delta > 31:
delta = delta
rd = relativedelta(hours=delta)
out = ""
for k in intervals[:terms]:
if getattr(rd, k):
out += '{} {} '.format(getattr(rd, k), k)
return out.strip()
[docs]def clean_convert(x, force_timed_index=True, always_df=False, **kwargs):
"""Converts a list, a numpy array, or a dataframe to pandas series or dataframe, depending on the
compatibility and the requirements. Designed for maximum compatibility.
Arguments:
x (list, np.ndarray): Vector or matrix of numbers. it can be pd.DataFrame, pd.Series, np.ndarray or list
force_timed_index (bool): if True it will return a timeseries index
year (int): Year that will be used for the index
always_df (bool): always return a dataframe even if the data is one dimensional
**kwargs: Exposes arguments of :meth:`make_timeseries`
Returns:
pd.Series: Timeseries
"""
if isinstance(x, list): # nice recursions
return clean_convert(pd.Series(x), force_timed_index, always_df, **kwargs)
elif isinstance(x, np.ndarray):
if len(x.shape) == 1:
return clean_convert(pd.Series(x),force_timed_index, always_df, **kwargs)
else:
return clean_convert(pd.DataFrame(x), force_timed_index, always_df, **kwargs)
elif isinstance(x, pd.Series):
if always_df:
x = pd.DataFrame(x)
if x.index.is_all_dates:
return x
else: # if not datetime index
if force_timed_index:
logging.debug('Forcing Datetimeindex into passed timeseries.'
'For more accurate results please pass a pandas time-indexed timeseries.')
return make_timeseries(x, **kwargs)
else: # does not require datetimeindex
return x
elif isinstance(x, pd.DataFrame):
if x.shape[1] == 1 and not always_df:
return clean_convert(x.squeeze(), force_timed_index, always_df, **kwargs)
else:
if force_timed_index and not x.index.is_all_dates:
return make_timeseries(x, **kwargs)
else: # does not require datetimeindex
return x
else:
raise ValueError('Unrecognized Type. Has to be one of the following: pd.DataFrame, pd.Series, np.ndarray or list')