from __future__ import division
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd
import numpy as np
from .analysis import reshape_timeseries, clean_convert, get_LDC
__all__ = ['plot_heatmap', 'plot_3d', 'plot_percentiles', 'plot_rug', 'plot_boxplot', 'plot_LDC' ]
[docs]def plot_heatmap(Load, x='dayofyear', y='hour', aggfunc='sum', bins=8,
figsize=(16,6), edgecolors='none', cmap='Oranges', colorbar=True, ax=None, **pltargs):
""" Returns a 2D heatmap of the reshaped timeseries based on x, y
Arguments:
Load: 1D pandas with timed index
x: Parameter for :meth:`enlopy.analysis.reshape_timeseries`
y: Parameter for :meth:`enlopy.analysis.reshape_timeseries`
bins: Number of bins for colormap
edgecolors: colour of edges around individual squares. 'none' or 'w' is recommended.
cmap: colormap name (from colorbrewer, matplotlib etc.)
**pltargs: Exposes matplotlib.plot arguments
Returns:
2d heatmap
"""
x_y = reshape_timeseries(Load, x=x, y=y, aggfunc=aggfunc)
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
cmap_obj = cm.get_cmap(cmap, bins)
heatmap = ax.pcolor(x_y, cmap=cmap_obj, edgecolors=edgecolors, **pltargs)
if colorbar:
fig.colorbar(heatmap)
ax.set_xlim(right=len(x_y.columns))
ax.set_ylim(top=len(x_y.index))
ax.set_xlabel(x)
ax.set_ylabel(y)
[docs]def plot_3d(Load, x='dayofyear', y='hour', aggfunc='sum', bins=15,
cmap='Oranges', colorbar=True, **pltargs):
""" Returns a 3D plot of the reshaped timeseries based on x, y
Arguments:
Load: 1D pandas with timed index
x: Parameter for :meth:`enlopy.analysis.reshape_timeseries`
y: Parameter for :meth:`enlopy.analysis.reshape_timeseries`
bins: Number of bins for colormap
cmap: colormap name (from colorbrewer, matplotlib etc.)
**pltargs: Exposes :meth:`matplotlib.pyplot.surface` arguments
Returns:
3d plot
"""
import mpl_toolkits.mplot3d # necessary for orojection=3d
x_y = reshape_timeseries(Load, x=x, y=y, aggfunc=aggfunc)
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111, projection='3d')
cmap_obj = cm.get_cmap(cmap, bins)
X, Y = np.meshgrid(range(len(x_y.columns)), range(len(x_y.index)))
surf = ax.plot_surface(X, Y, x_y, cmap=cmap_obj, rstride=1, cstride=1,
shade=False, antialiased=True, lw=0, **pltargs)
if colorbar:
fig.colorbar(surf)
# Set viewpoint.
# ax.azim = -130
ax.elev = 45
ax.auto_scale_xyz([0, len(x_y.columns)],
[0, len(x_y.index)],
[0, x_y.max().max()])
ax.set_xlabel(x)
ax.set_ylabel(y)
[docs]def plot_percentiles(Load, x='hour', zz='week', perc_list=[[5, 95], [25, 75], 50], ax=None, color='blue', **kwargs):
"""Plot predefined percentiles per timestep
Arguments:
Load: 1D pandas with timed index
x (str): x axis aggregator. See :meth:`enlopy.analysis.reshape_timeseries`
zz (str): similar to above for y axis
perc_list(list): List of percentiles to plot. If it is an integer then it will be plotted as a line. If it is list it has to contain two items and it will be plotted using fill_between()
**kwargs: exposes arguments of :meth:`matplotlib.pyplot.fill_between`
Returns:
Plot
"""
if ax is None: # Hack for nice jupyter notebook compatibility
ax=plt.gca()
a = reshape_timeseries(Load, x=x, y=zz, aggfunc='mean')
xx = a.columns.values
# TODO: s 2s 3s instead of percentiles
for i in perc_list:
if len(np.atleast_1d(i)) == 1:
perc = a.apply(lambda x: np.nanpercentile(x.values, i), axis=0)
ax.plot(xx, perc.values, color='black')
elif len(np.atleast_1d(i)) == 2:
perc0 = a.apply(lambda x: np.nanpercentile(x.values, i[0]), axis=0)
perc1 = a.apply(lambda x: np.nanpercentile(x.values, i[1]), axis=0)
ax.fill_between(xx, perc0, perc1, lw=.5, alpha=.3, color=color, **kwargs)
else:
raise ValueError('List items should be scalars or 2-item lists')
ax.set_xlim(left=min(xx), right=max(xx))
ax.set_xlabel(x)
[docs]def plot_boxplot(Load, by='day', **pltargs):
"""Return boxplot plot for each day of the week
Arguments:
Load (pd.Series): 1D pandas Series with timed index
by (str): group results by 'day' or 'hour'
**pltargs (dict): Exposes :meth:`matplotlib.pyplot.plot` arguments
Returns:
plot
"""
Load = clean_convert(Load,force_timed_index=True)
if by == 'day':
grp = Load.groupby(Load.index.weekday)
labels = "Mon Tue Wed Thu Fri Sat Sun".split()
elif by == 'hour':
grp = Load.groupby(Load.index.hour)
labels = np.arange(0, 24)
else:
raise NotImplementedError('Only "day" and "hour" are implemented')
a = []
for __, value in grp:
a.append(value)
plt.boxplot(a, labels=labels, **pltargs)
# TODO : Generalize to return monthly, hourly etc.
# TODO Is it really needed? pd.boxplot()
[docs]def plot_LDC(Load, stacked=True, x_norm=True, y_norm=False, cmap='Spectral', color='black',
legend=False, zoom_peak=False, ax=None, **kwargs):
"""Plot Load duration curve
Arguments:
Load (pd.Series): 1D pandas Series with timed index
x_norm (bool): Normalize x axis (0,1)
y_norm (bool): Normalize y axis (0,1)
color (str): color of line. For Series only (1D)
cmap (str): Colormap of area. For Dataframes only (2D)
legend (bool): Show legend. For Dataframes only (2D)
zoom_peak (bool): Show zoomed plot of peak
kwargs (dict): exposes arguments of pd.DataFrame.plot.area
Returns:
Load duration curve plot
"""
if ax is None:
__ = plt.figure(1)
ax_main = plt.axes()
else:
ax_main = ax
if Load.ndim >= 2:
if stacked:
x, y = get_LDC(Load, x_norm=x_norm, y_norm=y_norm)
# Reconverting to Dataframe as pd.plot.area is much more robust than plt.stackplot
ldc_frame = pd.DataFrame(y, index=x, columns=Load.columns)
y_max = np.nanmax(np.nansum(ldc_frame, axis=1))
if y_norm: # We need to renormalize cause the sum of columns is more than 1.0
ldc_frame = ldc_frame/ y_max
if (ldc_frame<0).any(axis=None): # TODO: Can we stack negative values?
raise ValueError('Cannot plot stacked area with negative values. Try stacked=False instead')
ldc_frame.plot.area(cmap=cmap, lw=0, legend=legend, ax=ax_main, **kwargs)
else:
for __, v in Load.items():
x, y = get_LDC(v, x_norm=x_norm, y_norm=y_norm)
ax_main.plot(x, y, color=color, **kwargs)
y_max = np.nanmax(y)
else:
x, y = get_LDC(Load, x_norm=x_norm, y_norm=y_norm)
ax_main.plot(x, y, color=color)
y_max = np.nanmax(y)
# Set axes labels
ax_x_min = np.min(x)
if x_norm:
ax_x_max = 1
xlabel = 'Normalized duration'
else:
ax_x_max = len(y)
xlabel = 'Duration'
if y_norm:
ax_y_max = 1
ylabel = 'Normalized Power'
else:
ax_y_max = y_max
ylabel = 'Power'
ax_main.set_xlim(ax_x_min, ax_x_max)
ax_main.set_xlabel(xlabel)
ax_main.set_ylim(0, ax_y_max * 1.01)
ax_main.set_ylabel(ylabel)
# Draw inset plot
if zoom_peak:
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
axins = zoomed_inset_axes(ax_main, 2.5, loc=1)
if y.ndim >= 2:
ldc_frame.plot.area(cmap=cmap, lw=0, legend=False, ax=axins, **kwargs)
else:
axins.plot(x, y, color=color)
axins.set_xlim([ax_x_min, 0.15 * ax_x_max]) #TODO: Estimate x axis limits based on plotted values
axins.set_ylim([0.8 * ax_y_max, ax_y_max])
axins.get_xaxis().set_ticks([])
axins.get_yaxis().set_ticks([])
mark_inset(ax_main, axins, loc1=1, loc2=3, fc="none", ec="0.5")
[docs]def plot_rug(df_series, on_off=False, cmap='Greys', fig_title='', fig_width=14, normalized=False):
"""Create multiaxis rug plot from pandas Dataframe
Arguments:
df_series (pd.DataFrame): 2D pandas with timed index
on_off (bool): if True all points that are above 0 will be plotted as one color. If False all values will be colored based on their value.
cmap (str): colormap name (from colorbrewer, matplotlib etc.)
fig_title (str): Figure title
normalized (bool): if True, all series colormaps will be normalized based on the maximum value of the dataframe
Returns:
plot
"""
def format_axis(iax):
# Formatting: remove all lines (not so elegant)
for spine in ['top','right','left','bottom']:
iax.axes.spines[spine].set_visible(False)
#iax.spines['right'].set_visible(False)
# iax.xaxis.set_ticks_position('none')
iax.yaxis.set_ticks_position('none')
iax.get_yaxis().set_ticks([])
iax.yaxis.set_label_coords(-.05, -.1)
def flag_operation(v):
if np.isnan(v) or v == 0:
return False
else:
return True
# check if Series or dataframe
if isinstance(df_series, pd.DataFrame):
rows = len(df_series.columns)
elif isinstance(df_series, pd.Series):
df_series = df_series.to_frame()
rows = 1
else:
raise ValueError("Has to be either Series or Dataframe")
if len(df_series) < 1:
raise ValueError("Has to be non empty Series or Dataframe")
max_color = np.nanmax(df_series.values)
min_color = np.nanmin(df_series.values)
__, axes = plt.subplots(nrows=rows, ncols=1, sharex=True,
figsize=(fig_width, 0.25 * rows), squeeze=False,
frameon=False, gridspec_kw={'hspace': 0.15})
for (item, iseries), iax in zip(df_series.iteritems(), axes.ravel()):
format_axis(iax)
iax.set_ylabel(str(item)[:30], rotation='horizontal',
rotation_mode='anchor',
horizontalalignment='right', x=-0.01)
x = iseries.index
if iseries.sum() > 0: # if series is not empty
if on_off:
i_on_off = iseries.apply(flag_operation).replace(False, np.nan)
i_on_off.plot(ax=iax, style='|', lw=.7, cmap=cmap)
else:
y = np.ones(len(iseries))
#Define (truncated) colormap:
if not normalized: # Replace max_color (frame) with series max
max_color = np.nanmax(iseries.values)
min_color = np.nanmin(iseries.values)
# Hack to plot max color when all series are equal
if np.isclose(min_color, max_color):
min_color = min_color * 0.99
iax.scatter(x, y,
marker='|', s=100,
c=iseries.values,
vmin=min_color,
vmax=max_color,
cmap=cmap)
axes.ravel()[0].set_title(fig_title)
axes.ravel()[-1].spines['bottom'].set_visible(True)
axes.ravel()[-1].set_xlim(np.min(x), np.max(x))
def plot_line_holidays():
#ax.vspan if day == holiday
#should work only with daily
pass
def describe_load(Load):
"""Summary plot that describes the most important features of the passed timeseries """
pass