Last active
April 23, 2021 15:41
-
-
Save gwangjinkim/8932df4fde3338740da022b76f1d84ca to your computer and use it in GitHub Desktop.
print list of dataframes into excel sheets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################# | |
# solution mainly using pandas functionalities | |
# reading-in returns an ordered dict (which contains sheet_names as key) -> close to R functionality | |
# appending to excel like this might be slower | |
################################################# | |
def xlsx2dfs(fpath, **kwargs): | |
return pd.from_excel(fpath, sheet_name=None, **kwargs) | |
def append_to_excel(df, fpath, sheet_name, header=True, index=True, **kwargs): | |
with pd.ExcelWriter(fpath, mode="a") as f: | |
df.to_excel(f, sheet_name=sheet_name, **kwargs) | |
def dfs2xlsx(dfs_dct, fpath, overwrite=True, header=True, index=True, **kwargs): | |
if overwrite: | |
if os.path.exists(fpath): | |
os.remove(fpath) | |
for sheet_name, df in dfs_dct.items(): | |
if os.path.exists(fpath): | |
append_to_excel(df, fpath,sheet_name, header=header, index=index, **kwargs) | |
else: | |
df.to_excel(fpath, sheet_name=sheet_name, header=header, index=index, **kwargs) | |
import pandas as pd | |
def dfs2xlsx(dfs, sheet_names=None, outfpath="output.xlsx", **kwargs): | |
""" | |
Write a list of DataFrames [dfs] to an Excel file [outfpath] | |
with Sheet names given in [sheet_names]. | |
Parameters: | |
dfs : DataFrames list | |
outfpath : Output file path | |
(Example: '/path/to/file.xlsx') | |
sheet_names : Names of sheets for each DataFrame. | |
**kw_args : other arguments passed to the excel writer in Pandas | |
Returns: None | |
""" | |
if sheet_names is None: | |
sheet_names = ["Sheet" + str(i) for i in range(len(dfs))] | |
with pd.ExcelWriter(outfpath, engine='openpyxl') as writer: | |
for i, df in enumerate(dfs): | |
df.to_excel(writer, sheet_names[i], **kwargs) | |
writer.save() | |
def withNames(*args): | |
""" | |
Helper function to input: "Sheet_1", DataFrame_1, "Sheet_1", DataFrame_2, ... | |
for dfs2xlsx(). And it will output (dfs, sheetnames) so that one can put | |
it into parameter with asterisk. | |
dfs2xlsx(*withNames("Sheet1", df1, "Sheet2", df2, ...), | |
outfpath="test.xlsx") | |
Parameters: | |
alternating string and DataFrame (sheet name and data frame) | |
Returns: DataFrames list, sheet names (strings list) | |
""" | |
assert len(args) % 2 == 0, "Non even number of args!" | |
gen = (x for x in args) | |
sheet_names, dfs = [], [] | |
for _ in range(int(len(args) / 2)): | |
sheet_names.append(next(gen)) | |
dfs.append(next(gen)) | |
return dfs, sheet_names | |
def xlsx2dfs(fpath, header=None, index_col=None, **kwargs): | |
""" | |
Read Excel Sheets of an Excel file [fpath] into a list of DataFrames. | |
The assumption is that the Excel Sheets contain pandas-readable DataFrame data. | |
Parameters: | |
fpath : Input Excel file path | |
header, index_col, **kwargs : parameters passed to pd.read_excel() function | |
Returns: Tuple of list of pandas data frames, list of sheet names | |
""" | |
xls = pd.ExcelFile(fpath) | |
dfs = [ pd.read_excel(fpath, header=header, index_col=index_col, sheet_name=x, **kwargs) \ | |
for x in xls.sheet_names] | |
return dfs, xls.sheet_names | |
""" | |
# Alternatively auch machbar: | |
""" | |
def xlsx2dfs(fpath, header=None, index_col=None, **kwargs): | |
""" | |
Read Excel Sheets of an Excel file [fpath] into a list of DataFrames. | |
The assumption is that the Excel Sheets contain pandas-readable DataFrame data. | |
Parameters: | |
fpath : Input Excel file path | |
header, index_col, **kwargs : parameters passed to pd.read_excel() function | |
Returns: Tuple of list of pandas data frames, list of sheet names | |
""" | |
dfs = pd.read_excel(fpath, sheet_name=None) | |
sheet_names = [], dfs_only = [] | |
for sheet_name, df in dfs: | |
sheet_names.append(sheet_name) | |
dfs_only.append(df) | |
return dfs, sheet_names |
shorten drastically and remove sheet adding function.
change order of parameters as preparation for *withNames()
add import pandas as pd
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
add dfs2xlsx