Skip to content

Instantly share code, notes, and snippets.

@myuanz
Created August 26, 2024 08:50
Show Gist options
  • Save myuanz/e8eb80deb47de8416c46ec4052c04101 to your computer and use it in GitHub Desktop.
Save myuanz/e8eb80deb47de8416c46ec4052c04101 to your computer and use it in GitHub Desktop.
read xlsx with excel filter conditions.
def read_visible_rows_to_dataframe(file_path: str, sheet_name: str|None=None) -> pd.DataFrame:
"""
Reads the specified sheet of an Excel file, ignores all hidden rows, and returns a pandas DataFrame.
:param file_path: Path to the Excel file
:param sheet_name: Name of the sheet to read. If None, reads the active sheet.
:return: A pandas DataFrame containing data from visible rows
"""
wb_info = load_workbook(filename=file_path, read_only=False)
if sheet_name:
sheet_info = wb_info.get_sheet_by_name(sheet_name)
else:
sheet_info = wb_info.active
hidden_rows = set(r - 2 for r, row_info in sheet_info.row_dimensions.items() if row_info.hidden)
# Row numbers start from 1, with 1 being the header row. Since pandas starts indexing from 0,
# we need to subtract 2 to align the row numbers correctly.
wb_info.close()
df = pd.read_excel(file_path, engine='openpyxl')
visible_df = df[~df.index.isin(hidden_rows)]
visible_df.reset_index(drop=True, inplace=True)
return visible_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment