Source code for thermostat.importers

from thermostat.core import Thermostat

import pandas as pd
import numpy as np
from eemeter.weather.location import zipcode_to_usaf_station
from eemeter.weather import ISDWeatherSource

import warnings
from datetime import datetime
from datetime import timedelta
import dateutil.parser
import os
import pytz

[docs]def from_csv(metadata_filename, verbose=False): """ Creates Thermostat objects from data stored in CSV files. Parameters ---------- metadata_filename : str Path to a file containing the thermostat metadata. verbose : boolean Set to True to output a more detailed log of import activity. Returns ------- thermostats : iterator over thermostat.Thermostat objects Thermostats imported from the given CSV input files. """ metadata = pd.read_csv( metadata_filename, dtype={ "thermostat_id": str, "zipcode": str, "utc_offset": str, "equipment_type": int, "interval_data_filename": str } ) for i, row in metadata.iterrows(): if verbose: print("Importing thermostat {}".format(row.thermostat_id)) # make sure this thermostat type is supported. if row.equipment_type not in [1, 2, 3, 4, 5]: warnings.warn("Skipping import of thermostat controlling equipment" " of unsupported type. (id={})".format(row.thermostat_id)) continue interval_data_filename = os.path.join(os.path.dirname(metadata_filename), row.interval_data_filename) try: thermostat = get_single_thermostat( row.thermostat_id, row.zipcode, row.equipment_type, row.utc_offset, interval_data_filename ) except ValueError: # Could not locate a station for the thermostat. Warn and skip. warnings.warn("Skipping import of thermostat (id={}) for which " \ "a sufficient source of outdoor weather data could not " \ "be located using the given ZIP code ({}). This likely " \ "due to the discrepancy between US Postal Service ZIP " \ "codes (which do not always map well to locations) and " \ "Census Bureau ZCTAs (which usually do). Please supply " \ "a zipcode which corresponds to a US Census Bureau ZCTA." \ .format(row.thermostat_id, row.zipcode)) continue yield thermostat
[docs]def get_single_thermostat(thermostat_id, zipcode, equipment_type, utc_offset, interval_data_filename): """ Load a single thermostat directly from an interval data file. Parameters ---------- thermostat_id : str A unique identifier for the thermostat. zipcode : str The zipcode of the thermostat, e.g. `"01234"`. equipment_type : str The equipment type of the thermostat. utc_offset : str A string representing the UTC offset of the interval data, e.g. `"-0700"`. Could also be `"Z"` (UTC), or just `"+7"` (equivalent to `"+0700"`), or any other timezone format recognized by the library method dateutil.parser.parse. interval_data_filename : str The path to the CSV in which the interval data is stored. Returns ------- thermostat : thermostat.Thermostat The loaded thermostat object. """ df = pd.read_csv(interval_data_filename) heating, cooling, aux_emerg = _get_equipment_type(equipment_type) # load indices dates = pd.to_datetime(df["date"]) daily_index = pd.DatetimeIndex(start=dates[0], periods = dates.shape[0], freq="D") hourly_index = pd.DatetimeIndex(start=dates[0], periods = dates.shape[0] * 24, freq="H") hourly_index_utc = pd.DatetimeIndex(start=dates[0], periods = dates.shape[0] * 24, freq="H", tz=pytz.UTC) # raise an error if dates are not aligned if not all(dates == daily_index): message("Dates provided for thermostat_id={} may contain some " "which are out of order, missing, or duplicated.".format(thermostat_id)) raise ValueError(message) # load hourly time series values temp_in = pd.Series(_get_hourly_block(df, "temp_in"), hourly_index) if heating: heating_setpoint = pd.Series(_get_hourly_block(df, "heating_setpoint"), hourly_index) else: heating_setpoint = None if cooling: cooling_setpoint = pd.Series(_get_hourly_block(df, "cooling_setpoint"), hourly_index) else: cooling_setpoint = None if aux_emerg: auxiliary_heat_runtime = pd.Series(_get_hourly_block(df, "auxiliary_heat_runtime"), hourly_index) emergency_heat_runtime = pd.Series(_get_hourly_block(df, "emergency_heat_runtime"), hourly_index) else: auxiliary_heat_runtime = None emergency_heat_runtime = None # load outdoor temperatures station = zipcode_to_usaf_station(zipcode) if station is None: message = "Could not locate a valid source of outdoor temperature " \ "data for ZIP code {}".format(zipcode) raise ValueError(message) ws_hourly = ISDWeatherSource(station) utc_offset = dateutil.parser.parse("2000-01-01T00:00:00" + utc_offset).tzinfo.utcoffset(None) temp_out = ws_hourly.indexed_temperatures(hourly_index_utc - utc_offset, "degF") temp_out.index = hourly_index # load daily time series values if cooling: cool_runtime = pd.Series(df["cool_runtime"].values, daily_index) else: cool_runtime = None if heating: heat_runtime = pd.Series(df["heat_runtime"].values, daily_index) else: heat_runtime = None # create thermostat instance thermostat = Thermostat( thermostat_id, equipment_type, zipcode, station, temp_in, temp_out, cooling_setpoint, heating_setpoint, cool_runtime, heat_runtime, auxiliary_heat_runtime, emergency_heat_runtime ) return thermostat
def _get_hourly_block(df, prefix): columns = ["{}_{:02d}".format(prefix, i) for i in range(24)] values = df[columns].values return values.reshape((values.shape[0] * values.shape[1],)) def _get_equipment_type(equipment_type): """ Returns ------- heating : boolean True if the equipment type has heating equipment cooling : boolean True if the equipment type has cooling equipment aux_emerg : boolean True if the equipment type has auxiliary/emergency heat equipment """ if equipment_type == 1: return True, True, True elif equipment_type == 2: return True, True, False elif equipment_type == 3: return True, True, False elif equipment_type == 4: return True, False, False elif equipment_type == 5: return False, True, False else: return None