import pandas as pd
import numpy as np
from scipy.stats import norm
from eemeter.location import _load_zipcode_to_lat_lng_index
from eemeter.location import _load_zipcode_to_station_index
from collections import OrderedDict
from collections import defaultdict
from itertools import chain
from warnings import warn
import json
from functools import reduce
from pkg_resources import resource_stream
from thermostat import get_version
REAL_OR_INTEGER_VALUED_COLUMNS_HEATING = [
'n_days_in_inputfile_date_range',
'n_days_both_heating_and_cooling',
'n_days_insufficient_data',
'n_core_heating_days',
'baseline_percentile_core_heating_comfort_temperature',
'regional_average_baseline_heating_comfort_temperature',
'percent_savings_baseline_percentile',
'avoided_daily_mean_core_day_runtime_baseline_percentile',
'avoided_total_core_day_runtime_baseline_percentile',
'baseline_daily_mean_core_day_runtime_baseline_percentile',
'baseline_total_core_day_runtime_baseline_percentile',
'_daily_mean_core_day_demand_baseline_baseline_percentile',
'percent_savings_baseline_regional',
'avoided_daily_mean_core_day_runtime_baseline_regional',
'avoided_total_core_day_runtime_baseline_regional',
'baseline_daily_mean_core_day_runtime_baseline_regional',
'baseline_total_core_day_runtime_baseline_regional',
'_daily_mean_core_day_demand_baseline_baseline_regional',
'mean_demand',
'alpha',
'tau',
'mean_sq_err',
'root_mean_sq_err',
'cv_root_mean_sq_err',
'mean_abs_err',
'mean_abs_pct_err',
'total_core_heating_runtime',
'total_auxiliary_heating_core_day_runtime',
'total_emergency_heating_core_day_runtime',
'daily_mean_core_heating_runtime',
'rhu_00F_to_05F',
'rhu_05F_to_10F',
'rhu_10F_to_15F',
'rhu_15F_to_20F',
'rhu_20F_to_25F',
'rhu_25F_to_30F',
'rhu_30F_to_35F',
'rhu_35F_to_40F',
'rhu_40F_to_45F',
'rhu_45F_to_50F',
'rhu_50F_to_55F',
'rhu_55F_to_60F',
]
REAL_OR_INTEGER_VALUED_COLUMNS_COOLING = [
'n_days_in_inputfile_date_range',
'n_days_both_heating_and_cooling',
'n_days_insufficient_data',
'n_core_cooling_days',
'baseline_percentile_core_cooling_comfort_temperature',
'regional_average_baseline_cooling_comfort_temperature',
'percent_savings_baseline_percentile',
'avoided_daily_mean_core_day_runtime_baseline_percentile',
'avoided_total_core_day_runtime_baseline_percentile',
'baseline_daily_mean_core_day_runtime_baseline_percentile',
'baseline_total_core_day_runtime_baseline_percentile',
'_daily_mean_core_day_demand_baseline_baseline_percentile',
'percent_savings_baseline_regional',
'avoided_daily_mean_core_day_runtime_baseline_regional',
'avoided_total_core_day_runtime_baseline_regional',
'baseline_daily_mean_core_day_runtime_baseline_regional',
'baseline_total_core_day_runtime_baseline_regional',
'_daily_mean_core_day_demand_baseline_baseline_regional',
'mean_demand',
'alpha',
'tau',
'mean_sq_err',
'root_mean_sq_err',
'cv_root_mean_sq_err',
'mean_abs_err',
'mean_abs_pct_err',
'total_core_cooling_runtime',
'daily_mean_core_cooling_runtime',
]
REAL_OR_INTEGER_VALUED_COLUMNS_ALL = [
'n_days_in_inputfile_date_range',
'n_days_both_heating_and_cooling',
'n_days_insufficient_data',
'n_core_cooling_days',
'n_core_heating_days',
'baseline_percentile_core_cooling_comfort_temperature',
'baseline_percentile_core_heating_comfort_temperature',
'regional_average_baseline_cooling_comfort_temperature',
'regional_average_baseline_heating_comfort_temperature',
'percent_savings_baseline_percentile',
'avoided_daily_mean_core_day_runtime_baseline_percentile',
'avoided_total_core_day_runtime_baseline_percentile',
'baseline_daily_mean_core_day_runtime_baseline_percentile',
'baseline_total_core_day_runtime_baseline_percentile',
'_daily_mean_core_day_demand_baseline_baseline_percentile',
'percent_savings_baseline_regional',
'avoided_daily_mean_core_day_runtime_baseline_regional',
'avoided_total_core_day_runtime_baseline_regional',
'baseline_daily_mean_core_day_runtime_baseline_regional',
'baseline_total_core_day_runtime_baseline_regional',
'_daily_mean_core_day_demand_baseline_baseline_regional',
'mean_demand',
'alpha',
'tau',
'mean_sq_err',
'root_mean_sq_err',
'cv_root_mean_sq_err',
'mean_abs_err',
'mean_abs_pct_err',
'total_core_cooling_runtime',
'total_core_heating_runtime',
'total_auxiliary_heating_core_day_runtime',
'total_emergency_heating_core_day_runtime',
'daily_mean_core_cooling_runtime',
'daily_mean_core_heating_runtime',
'rhu_00F_to_05F',
'rhu_05F_to_10F',
'rhu_10F_to_15F',
'rhu_15F_to_20F',
'rhu_20F_to_25F',
'rhu_25F_to_30F',
'rhu_30F_to_35F',
'rhu_35F_to_40F',
'rhu_40F_to_45F',
'rhu_45F_to_50F',
'rhu_50F_to_55F',
'rhu_55F_to_60F',
]
[docs]def combine_output_dataframes(dfs):
""" Combines output dataframes. Useful when combining output from batches.
Parameters
----------
dfs : list of pd.DataFrame
Output dataFrames to combine into one.
Returns
-------
out : pd.DataFrame
Dataframe with combined output metadata.
"""
return pd.concat(dfs, ignore_index=True)
[docs]def get_filtered_stats(
df, row_filter, label, heating_or_cooling, target_columns,
target_baseline_method):
n_rows_total = df.shape[0]
filtered_df = df[[row_filter(row, df) for i, row in df.iterrows()]]
n_rows_kept = filtered_df.shape[0]
n_rows_discarded = n_rows_total - n_rows_kept
stats = OrderedDict()
stats["label"] = "{}_{}".format(label, heating_or_cooling)
stats["sw_version"] = get_version()
stats["n_thermostat_core_day_sets_total"] = n_rows_total
stats["n_thermostat_core_day_sets_kept"] = n_rows_kept
stats["n_thermostat_core_day_sets_discarded"] = n_rows_discarded
if n_rows_total > 0:
for column_name in target_columns:
column = filtered_df[column_name].replace([np.inf, -np.inf], np.nan).dropna()
# calculate quantiles and statistics
mean = np.nanmean(column)
sem = np.nanstd(column) / (column.count() ** .5)
lower_bound = mean - (1.96 * sem)
upper_bound = mean + (1.96 * sem)
stats["{}_n".format(column_name)] = column.count()
stats["{}_upper_bound_95_perc_conf".format(column_name)] = upper_bound
stats["{}_mean".format(column_name)] = mean
stats["{}_lower_bound_95_perc_conf".format(column_name)] = lower_bound
stats["{}_sem".format(column_name)] = sem
for quantile in [10, 20, 30, 40, 50, 60, 70, 80, 90]:
stats["{}_q{}".format(column_name, quantile)] = column.quantile(quantile / 100.)
return [stats]
else:
warn(
"Not enough data to compute summary_statistics ({}_{})"
.format(label, heating_or_cooling)
)
return []
[docs]def compute_summary_statistics(
metrics_df,
target_baseline_method="baseline_percentile",
advanced_filtering=False):
""" Computes summary statistics for the output dataframe. Computes the
following statistics for each real-valued or integer valued column in
the output dataframe: mean, standard error of the mean, and deciles.
Parameters
----------
df : pd.DataFrame
Output for which to compute summary statistics.
label : str
Name for this set of thermostat outputs.
target_baseline_method : {"baseline_percentile", "baseline_regional"}, default "baseline_percentile"
Baselining method by which samples will be filtered according to bad fits.
Returns
-------
stats : collections.OrderedDict
An ordered dict containing the summary statistics. Column names are as
follows, in which ### is a placeholder for the name of the column:
- mean: ###_mean
- standard error of the mean: ###_sem
- 10th quantile: ###_10q
- 20th quantile: ###_20q
- 30th quantile: ###_30q
- 40th quantile: ###_40q
- 50th quantile: ###_50q
- 60th quantile: ###_60q
- 70th quantile: ###_70q
- 80th quantile: ###_80q
- 90th quantile: ###_90q
- number of non-null core day sets: ###_n
The following general values are also output:
- label: label
- number of total core day sets: n_total_core_day_sets
"""
if target_baseline_method not in ["baseline_percentile", "baseline_regional"]:
message = (
'Baseline method not supported - please use one of'
' "baseline_percentile" or "baseline_regional"'
)
raise ValueError(message)
def _identity_filter(row, df):
return True
def _range_filter(row, column_name, heating_or_cooling, lower_bound=-np.inf, upper_bound=np.inf, target_baseline=False):
if target_baseline:
full_column_selector = "{}_{}".format(column_name, target_baseline_method)
else:
full_column_selector = column_name
column_value = row[full_column_selector]
return lower_bound < column_value < upper_bound
def _percentile_range_filter(row, column_name, heating_or_cooling, df, quantile=0.0, target_baseline=False):
if target_baseline:
full_column_selector = "{}_{}".format(column_name, target_baseline_method)
else:
full_column_selector = column_name
lower_bound = df[full_column_selector].dropna().quantile(0.0 + quantile)
upper_bound = df[full_column_selector].dropna().quantile(1.0 - quantile)
return _range_filter(row, column_name, heating_or_cooling, lower_bound, upper_bound, target_baseline)
def _tau_filter_heating(row, df):
return _range_filter(row, "tau", "heating", 0, 25)
def _tau_filter_cooling(row, df):
return _range_filter(row, "tau", "cooling", 0, 25)
def _cvrmse_filter_heating(row, df):
return _range_filter(row, "cv_root_mean_sq_err", "heating", upper_bound=0.6)
def _cvrmse_filter_cooling(row, df):
return _range_filter(row, "cv_root_mean_sq_err", "cooling", upper_bound=0.6)
def _savings_filter_p01_heating(row, df):
return _percentile_range_filter(row, "percent_savings", "heating", df, 0.01, True)
def _savings_filter_p01_cooling(row, df):
return _percentile_range_filter(row, "percent_savings", "cooling", df, 0.01, True)
def _combine_filters(filters):
def _new_filter(row, df):
return reduce(lambda x, y: x and y(row, df), filters, True)
return _new_filter
def heating_stats(df, filter_, label):
heating_df = df[["heating" in name for name in df["heating_or_cooling"]]]
return get_filtered_stats(
heating_df, filter_, label,
"heating", REAL_OR_INTEGER_VALUED_COLUMNS_HEATING,
target_baseline_method)
def cooling_stats(df, filter_, label):
cooling_df = df[["cooling" in name for name in df["heating_or_cooling"]]]
return get_filtered_stats(
cooling_df, filter_, label,
"cooling", REAL_OR_INTEGER_VALUED_COLUMNS_COOLING,
target_baseline_method)
very_cold_cold_df = metrics_df[[
(cz is not None) and "Very-Cold/Cold" in cz
for cz in metrics_df["climate_zone"]
]]
mixed_humid_df = metrics_df[[
(cz is not None) and "Mixed-Humid" in cz
for cz in metrics_df["climate_zone"]
]]
mixed_dry_hot_dry_df = metrics_df[[
(cz is not None) and "Mixed-Dry/Hot-Dry" in cz
for cz in metrics_df["climate_zone"]
]]
hot_humid_df = metrics_df[[
(cz is not None) and "Hot-Humid" in cz
for cz in metrics_df["climate_zone"]
]]
marine_df = metrics_df[[
(cz is not None) and "Marine" in cz
for cz in metrics_df["climate_zone"]
]]
filter_0 = _identity_filter
filter_1_heating = _combine_filters([_tau_filter_heating])
filter_1_cooling = _combine_filters([_tau_filter_cooling])
filter_2_heating = _combine_filters([_tau_filter_heating, _cvrmse_filter_heating])
filter_2_cooling = _combine_filters([_tau_filter_cooling, _cvrmse_filter_cooling])
filter_3_heating = _combine_filters([_tau_filter_heating, _cvrmse_filter_heating, _savings_filter_p01_heating])
filter_3_cooling = _combine_filters([_tau_filter_cooling, _cvrmse_filter_cooling, _savings_filter_p01_cooling])
if advanced_filtering:
stats = list(chain.from_iterable([
heating_stats(metrics_df, filter_0, "all_no_filter"),
cooling_stats(metrics_df, filter_0, "all_no_filter"),
heating_stats(very_cold_cold_df, filter_0, "very-cold_cold_no_filter"),
cooling_stats(very_cold_cold_df, filter_0, "very-cold_cold"),
heating_stats(mixed_humid_df, filter_0, "mixed-humid_no_filter"),
cooling_stats(mixed_humid_df, filter_0, "mixed-humid_no_filter"),
heating_stats(mixed_dry_hot_dry_df, filter_0, "mixed-dry_hot-dry_no_filter"),
cooling_stats(mixed_dry_hot_dry_df, filter_0, "mixed-dry_hot-dry_no_filter"),
heating_stats(hot_humid_df, filter_0, "hot-humid_no_filter"),
cooling_stats(hot_humid_df, filter_0, "hot-humid_no_filter"),
heating_stats(marine_df, filter_0, "marine_no_filter"),
cooling_stats(marine_df, filter_0, "marine_no_filter"),
heating_stats(metrics_df, filter_1_heating, "all_tau_filter"),
cooling_stats(metrics_df, filter_1_cooling, "all_tau_filter"),
heating_stats(very_cold_cold_df, filter_1_heating, "very-cold_cold_tau_filter"),
cooling_stats(very_cold_cold_df, filter_1_cooling, "very-cold_cold_tau_filter"),
heating_stats(mixed_humid_df, filter_1_heating, "mixed-humid_tau_filter"),
cooling_stats(mixed_humid_df, filter_1_cooling, "mixed-humid_tau_filter"),
heating_stats(mixed_dry_hot_dry_df, filter_1_heating, "mixed-dry_hot-dry_tau_filter"),
cooling_stats(mixed_dry_hot_dry_df, filter_1_cooling, "mixed-dry_hot-dry_tau_filter"),
heating_stats(hot_humid_df, filter_1_heating, "hot-humid_tau_filter"),
cooling_stats(hot_humid_df, filter_1_cooling, "hot-humid_tau_filter"),
heating_stats(marine_df, filter_1_heating, "marine_tau_filter"),
cooling_stats(marine_df, filter_1_cooling, "marine_tau_filter"),
heating_stats(metrics_df, filter_2_heating, "all_tau_cvrmse_filter"),
cooling_stats(metrics_df, filter_2_cooling, "all_tau_cvrmse_filter"),
heating_stats(very_cold_cold_df, filter_2_heating, "very-cold_cold_tau_cvrmse_filter"),
cooling_stats(very_cold_cold_df, filter_2_cooling, "very-cold_cold_tau_cvrmse_filter"),
heating_stats(mixed_humid_df, filter_2_heating, "mixed-humid_tau_cvrmse_filter"),
cooling_stats(mixed_humid_df, filter_2_cooling, "mixed-humid_tau_cvrmse_filter"),
heating_stats(mixed_dry_hot_dry_df, filter_2_heating, "mixed-dry_hot-dry_tau_cvrmse_filter"),
cooling_stats(mixed_dry_hot_dry_df, filter_2_cooling, "mixed-dry_hot-dry_tau_cvrmse_filter"),
heating_stats(hot_humid_df, filter_2_heating, "hot-humid_tau_cvrmse_filter"),
cooling_stats(hot_humid_df, filter_2_cooling, "hot-humid_tau_cvrmse_filter"),
heating_stats(marine_df, filter_2_heating, "marine_tau_cvrmse_filter"),
cooling_stats(marine_df, filter_2_cooling, "marine_tau_cvrmse_filter"),
heating_stats(metrics_df, filter_3_heating, "all_tau_cvrmse_savings_p01_filter"),
cooling_stats(metrics_df, filter_3_cooling, "all_tau_cvrmse_savings_p01_filter"),
heating_stats(very_cold_cold_df, filter_3_heating, "very-cold_cold_tau_cvrmse_savings_p01_filter"),
cooling_stats(very_cold_cold_df, filter_3_cooling, "very-cold_cold_tau_cvrmse_savings_p01_filter"),
heating_stats(mixed_humid_df, filter_3_heating, "mixed-humid_tau_cvrmse_savings_p01_filter"),
cooling_stats(mixed_humid_df, filter_3_cooling, "mixed-humid_tau_cvrmse_savings_p01_filter"),
heating_stats(mixed_dry_hot_dry_df, filter_3_heating, "mixed-dry_hot-dry_tau_cvrmse_savings_p01_filter"),
cooling_stats(mixed_dry_hot_dry_df, filter_3_cooling, "mixed-dry_hot-dry_tau_cvrmse_savings_p01_filter"),
heating_stats(hot_humid_df, filter_3_heating, "hot-humid_tau_cvrmse_savings_p01_filter"),
cooling_stats(hot_humid_df, filter_3_cooling, "hot-humid_tau_cvrmse_savings_p01_filter"),
heating_stats(marine_df, filter_3_heating, "marine_tau_cvrmse_savings_p01_filter"),
cooling_stats(marine_df, filter_3_cooling, "marine_tau_cvrmse_savings_p01_filter"),
]))
else:
stats = list(chain.from_iterable([
heating_stats(metrics_df, filter_0, "all_no_filter"),
cooling_stats(metrics_df, filter_0, "all_no_filter"),
heating_stats(very_cold_cold_df, filter_0, "very-cold_cold_no_filter"),
cooling_stats(very_cold_cold_df, filter_0, "very-cold_cold"),
heating_stats(mixed_humid_df, filter_0, "mixed-humid_no_filter"),
cooling_stats(mixed_humid_df, filter_0, "mixed-humid_no_filter"),
heating_stats(mixed_dry_hot_dry_df, filter_0, "mixed-dry_hot-dry_no_filter"),
cooling_stats(mixed_dry_hot_dry_df, filter_0, "mixed-dry_hot-dry_no_filter"),
heating_stats(hot_humid_df, filter_0, "hot-humid_no_filter"),
cooling_stats(hot_humid_df, filter_0, "hot-humid_no_filter"),
heating_stats(marine_df, filter_0, "marine_no_filter"),
cooling_stats(marine_df, filter_0, "marine_no_filter"),
heating_stats(metrics_df, filter_3_heating, "all_tau_cvrmse_savings_p01_filter"),
cooling_stats(metrics_df, filter_3_cooling, "all_tau_cvrmse_savings_p01_filter"),
heating_stats(very_cold_cold_df, filter_3_heating, "very-cold_cold_tau_cvrmse_savings_p01_filter"),
cooling_stats(very_cold_cold_df, filter_3_cooling, "very-cold_cold_tau_cvrmse_savings_p01_filter"),
heating_stats(mixed_humid_df, filter_3_heating, "mixed-humid_tau_cvrmse_savings_p01_filter"),
cooling_stats(mixed_humid_df, filter_3_cooling, "mixed-humid_tau_cvrmse_savings_p01_filter"),
heating_stats(mixed_dry_hot_dry_df, filter_3_heating, "mixed-dry_hot-dry_tau_cvrmse_savings_p01_filter"),
cooling_stats(mixed_dry_hot_dry_df, filter_3_cooling, "mixed-dry_hot-dry_tau_cvrmse_savings_p01_filter"),
heating_stats(hot_humid_df, filter_3_heating, "hot-humid_tau_cvrmse_savings_p01_filter"),
cooling_stats(hot_humid_df, filter_3_cooling, "hot-humid_tau_cvrmse_savings_p01_filter"),
heating_stats(marine_df, filter_3_heating, "marine_tau_cvrmse_savings_p01_filter"),
cooling_stats(marine_df, filter_3_cooling, "marine_tau_cvrmse_savings_p01_filter"),
]))
stats_dict = {stat["label"]: stat for stat in stats}
def _load_climate_zone_weights(filename_or_buffer):
climate_zone_keys = {
"Very-Cold/Cold": "very-cold_cold",
"Mixed-Humid": "mixed-humid",
"Mixed-Dry/Hot-Dry": "mixed-dry_hot-dry",
"Hot-Humid": "hot-humid",
"Marine": "marine",
}
df = pd.read_csv(
filename_or_buffer,
usecols=["climate_zone", "heating_weight", "cooling_weight"],
).set_index("climate_zone")
heating_weights = {climate_zone_keys[cz]: weight for cz, weight in df["heating_weight"].iteritems()}
cooling_weights = {climate_zone_keys[cz]: weight for cz, weight in df["cooling_weight"].iteritems()}
return heating_weights, cooling_weights
with resource_stream('thermostat.resources', 'NationalAverageClimateZoneWeightings.csv') as f:
heating_weights, cooling_weights = _load_climate_zone_weights(f)
def _compute_national_weightings(stats_by_climate_zone, keys, weights):
def _national_weight(key):
results = []
for cz, weight in weights.items():
stat_cz = stats_by_climate_zone.get(cz)
if stat_cz is None:
value = None
else:
value = stat_cz.get(key)
if pd.notnull(weight) and pd.notnull(value):
results.append((weight, value))
if len(results) == 0:
return None
else:
weighted_sum = sum([weight * value for weight, value in results])
sum_of_weights = sum([weight for weight, _ in results])
return weighted_sum / sum_of_weights
stats = [
"mean",
"q10",
"q20",
"q30",
"q40",
"q50",
"q60",
"q70",
"q80",
"q90",
]
key_stats = [
"{}_{}".format(key, stat)
for key in keys for stat in stats
]
return {
"{}_{}".format(key_stat, "national_weighted_mean"): _national_weight(key_stat)
for key_stat in key_stats
}
def _compute_national_weighting_lower_and_upper_bounds(
stats_by_climate_zone, keys, weights):
def _compute_bounds(key):
# compute sem savings
means, sems, weights_ = [], [], []
for cz, weight in weights.items():
stat_cz = stats_by_climate_zone.get(cz)
if stat_cz is None:
mean, sem = None, None
else:
mean = stat_cz.get("{}_mean".format(key), None)
sem = stat_cz.get("{}_sem".format(key), None)
if pd.notnull(weight) and pd.notnull(mean) and pd.notnull(sem):
weights_.append(weight)
means.append(mean)
sems.append(sem)
if len(weights_) == 0:
return {}
else:
weighted_sum = sum([
weight * mean for weight, mean in zip(weights_, means)
])
weighted_mean = weighted_sum / sum(weights_) # renormalize
weighted_sem = sum([
(weight*sem) ** 2 for weight, sem in zip(weights_, sems)
]) ** 0.5
lower_bound = weighted_mean - (1.96 * weighted_sem)
upper_bound = weighted_mean + (1.96 * weighted_sem)
return {
"{}_lower_bound_95_perc_conf_national_weighted_mean".format(key): lower_bound,
"{}_upper_bound_95_perc_conf_national_weighted_mean".format(key): upper_bound
}
items = {}
for key in keys:
items.update(_compute_bounds(key))
return items
national_weighting_stats = []
if advanced_filtering:
filters = [
"no_filter",
"tau_filter",
"tau_cvrmse_filter",
"tau_cvrmse_savings_p01_filter",
]
else:
filters = [
"no_filter",
"tau_cvrmse_savings_p01_filter",
]
climate_zones = [
"mixed-humid",
"mixed-dry_hot-dry",
"marine",
"hot-humid",
"very-cold_cold"
]
methods = [
"baseline_percentile",
"baseline_regional",
]
for season_type in ["heating", "cooling"]:
if season_type == "heating":
weights = heating_weights
else:
weights = cooling_weights
for filter_ in filters:
stats_by_climate_zone = {
cz: stats_dict.get("{}_{}_{}".format(cz, filter_, season_type))
for cz in climate_zones
}
keys = ["percent_savings_{}".format(method) for method in methods]
national_weightings = _compute_national_weightings(
stats_by_climate_zone, keys, weights)
bounds = _compute_national_weighting_lower_and_upper_bounds(
stats_by_climate_zone, keys, weights)
national_weightings.update(bounds)
national_weightings.update(
{"label": "national_weighted_mean_{}_{}".format(season_type, filter_)}
)
national_weighting_stats.append(national_weightings)
stats = national_weighting_stats + stats
return stats
[docs]def summary_statistics_to_csv(stats, filepath, product_id):
""" Write metric statistics to CSV file.
Parameters
----------
stats : list of dict
List of outputs from thermostat.stats.compute_summary_statistics()
filepath : str
Filepath at which to save the suppary statistics
product_id : str
A combination of the connected thermostat service plus one or more
connected thermostat device models that comprises the data set.
Returns
-------
df : pandas.DataFrame
A pandas dataframe containing the output data.
"""
columns = [
"label",
"product_id",
"sw_version",
]
methods = [
"baseline_percentile",
"baseline_regional",
]
national_weighting_columns = list(chain.from_iterable([
[
"percent_savings_{}_mean_national_weighted_mean".format(method),
"percent_savings_{}_q10_national_weighted_mean".format(method),
"percent_savings_{}_q20_national_weighted_mean".format(method),
"percent_savings_{}_q30_national_weighted_mean".format(method),
"percent_savings_{}_q40_national_weighted_mean".format(method),
"percent_savings_{}_q50_national_weighted_mean".format(method),
"percent_savings_{}_q60_national_weighted_mean".format(method),
"percent_savings_{}_q70_national_weighted_mean".format(method),
"percent_savings_{}_q80_national_weighted_mean".format(method),
"percent_savings_{}_q90_national_weighted_mean".format(method),
"percent_savings_{}_lower_bound_95_perc_conf_national_weighted_mean".format(method),
"percent_savings_{}_upper_bound_95_perc_conf_national_weighted_mean".format(method),
] for method in methods
]))
columns.extend(national_weighting_columns)
columns.extend([
"n_thermostat_core_day_sets_total",
"n_thermostat_core_day_sets_kept",
"n_thermostat_core_day_sets_discarded",
])
for column_name in REAL_OR_INTEGER_VALUED_COLUMNS_ALL:
columns.append("{}_n".format(column_name))
columns.append("{}_upper_bound_95_perc_conf".format(column_name))
columns.append("{}_mean".format(column_name))
columns.append("{}_lower_bound_95_perc_conf".format(column_name))
columns.append("{}_sem".format(column_name))
for quantile in [10, 20, 30, 40, 50, 60, 70, 80, 90]:
columns.append("{}_q{}".format(column_name, quantile))
# add product_id
for row in stats:
row["product_id"] = product_id
# transpose for readability.
stats_dataframe = pd.DataFrame(stats, columns=columns).set_index('label').transpose()
stats_dataframe.to_csv(filepath)
return stats_dataframe