Skip to content

Instantly share code, notes, and snippets.

@bt-
Created March 17, 2025 17:48
Show Gist options
  • Save bt-/38320fb30832d1ec069a6c7b19fce6f1 to your computer and use it in GitHub Desktop.
Save bt-/38320fb30832d1ec069a6c7b19fce6f1 to your computer and use it in GitHub Desktop.
Export data from a pvcaptest CapData object to a PVsyst standard format csv file
def to_pvsyst_csv(
path,
cd_obj,
start_date,
loc,
fname_prefix,
end_date=None,
ghi_col_name=None,
dhi_col_name=None,
poa=False,
export=True,
filtered=True,
return_ix=True,
):
"""
Given a CapData object export a PVsyst csv format weather file.
Function does not resample! PVsyst requires hourly intervals
for simulation. To succesfully import the output of this
function into PVsyst you must pass a CapData object where the
index of the dataframe attribute has an hourly frequency.
Output file has GHI, ambient temperature, and wind speed data
and expects to find this data in the CapData object identified
by the the tranlsation dictionary and the regression translation
dictionary.
Adds units assuming that the GHI data is W/m2, the temperature
is in Celsius, and the wind speed is in m/sec.
Any missing data is filled with zeros.
path : str
Path to export csv file.
cd_obj : CapData
captest CapData object.
start_date : str
loc : dict
Location dictionary. Same as that used in the Captest csky function, but with
the city and state added as strings. Below is an example loc dictionary.
loc = {'latitude': 30.275128,
'longitude': -97.740350
'altitude': 300,
'tz': 'America/Chicago',
'city': 'Austin',
'state': 'TX'}
fname_prefix : str
Prefix for the filename of the exported file. Also used in header data of
exported file as the site name.
end_date : str, default None
End date of data to be exported. By default uses all data from the passed
start date to the end of the data available.
ghi_col_name : str, default None
A column name used to select the column with GHI data for inclusion in the
exported file. By default an attempt is made to identify the GHI column
from the translation dictionary.
dhi_col_name : str, default None
A column name used to select the column with DHI data for inclusion in the
exported file.
poa : bool, default False
Set to true to include POA irradiance in the data exported.
export : bool, default True
If true exports the data, if false will return a tuple. The first value
is a pvsyst format dataframe and the second value is the header data as
a string.
filtered : bool, default True
By default exports data from the data_filtered attribute. Set to False to use
the data in the df attribute of the passed CapData object.
return_ix : bool, default True
By default returns the index of the exported dataframe.
"""
if filtered:
df = cd_obj.data_filtered.copy()
else:
df = cd_obj.data.copy()
# select portion of data to output and fill missing timestamps with zeros
if end_date is None:
end_date = df.index[-1]
df = df.loc[start_date:end_date, :]
else:
df = df.loc[start_date:end_date, :]
complete_index = pd.date_range(start=start_date, end=end_date, freq="60min")
df = df.reindex(index=complete_index, fill_value=0)
# initialize empty dataframe to hold data and return from function
df_pvsyst_format = pd.DataFrame()
# add date and time columns to dataframe
df_pvsyst_format["Year"] = df.index.year.astype("int")
df_pvsyst_format["Month"] = df.index.month.astype("int")
df_pvsyst_format["Day"] = df.index.day.astype("int")
df_pvsyst_format["Hour"] = df.index.hour.astype("int")
# can't add the below data w/o the index
df_pvsyst_format.index = df.index
# try to find a ghi column
if ghi_col_name is None:
ghi_trans_keys = [key for key in cd_obj.trans_keys if "ghi" in key]
if len(ghi_trans_keys) == 1:
ghi_col_name = cd_obj.column_groups[ghi_trans_keys[0]]
elif len(ghi_trans_keys) == 0:
print("There are no GHI columns identified in the translation dictionary.")
return
else:
print(
"There is more than one ghi column_groupslation key. Pass one to ghi_col_name:"
)
for ghi_key in ghi_column_groups_keys:
print(ghi_key)
return
# add GHI, ambient temp, and wind speed to output dataframe
df_pvsyst_format["GHI"] = df[ghi_col_name]
if dhi_col_name is not None:
try:
df_pvsyst_format["DHI"] = df[dhi_col_name]
except KeyError:
warnings.warn("DHI column not found in data.")
if poa:
df_pvsyst_format["GPI"] = cd_obj.get_reg_cols("poa")
df_pvsyst_format["Tamb"] = cd_obj.get_reg_cols("t_amb")
df_pvsyst_format["WindVel"] = cd_obj.get_reg_cols("w_vel")
units = {
"Year": "",
"Month": "",
"Day": "",
"Hour": "",
"GHI": "W/m2",
"DHI": "W/m2",
"GPI": "W/m2",
"Tamb": "deg.C",
"WindVel": "m/sec",
}
def add_units(df, units):
df.columns = pd.MultiIndex.from_arrays(
[
df.columns,
[units[col] for col in df.columns],
]
)
return df
output_columns = [
"Year",
"Month",
"Day",
"Hour",
"GHI",
"DHI",
"GPI",
"Tamb",
"WindVel",
]
df_pvsyst_format = add_units(df_pvsyst_format, units).reindex(
columns=output_columns, level=0
)
# Replace missing data (NaNs) with zeros
df_pvsyst_format.fillna(value=0, inplace=True)
header_str = build_pvsyst_header_str(
df_pvsyst_format, fname_prefix, loc, country="USA"
)
# filename of exported file
pvsyst_export_filename = (
fname_prefix
+ "_"
+ df_pvsyst_format.index[0].strftime("%Y%m%d")
+ "to"
+ df_pvsyst_format.index[-1].strftime("%Y%m%d")
)
# create csv file
if export:
# output data
df_pvsyst_format.to_csv(path + pvsyst_export_filename + ".csv", index=False)
# prepend header rows to exported file
line_prepender(path + pvsyst_export_filename + ".csv", header_str)
print("Exported file is called: {}.csv".format(pvsyst_export_filename))
return df_pvsyst_format.index
else:
return df_pvsyst_format, header_str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment