Created
August 26, 2024 08:50
-
-
Save myuanz/e8eb80deb47de8416c46ec4052c04101 to your computer and use it in GitHub Desktop.
read xlsx with excel filter conditions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_visible_rows_to_dataframe(file_path: str, sheet_name: str|None=None) -> pd.DataFrame: | |
""" | |
Reads the specified sheet of an Excel file, ignores all hidden rows, and returns a pandas DataFrame. | |
:param file_path: Path to the Excel file | |
:param sheet_name: Name of the sheet to read. If None, reads the active sheet. | |
:return: A pandas DataFrame containing data from visible rows | |
""" | |
wb_info = load_workbook(filename=file_path, read_only=False) | |
if sheet_name: | |
sheet_info = wb_info.get_sheet_by_name(sheet_name) | |
else: | |
sheet_info = wb_info.active | |
hidden_rows = set(r - 2 for r, row_info in sheet_info.row_dimensions.items() if row_info.hidden) | |
# Row numbers start from 1, with 1 being the header row. Since pandas starts indexing from 0, | |
# we need to subtract 2 to align the row numbers correctly. | |
wb_info.close() | |
df = pd.read_excel(file_path, engine='openpyxl') | |
visible_df = df[~df.index.isin(hidden_rows)] | |
visible_df.reset_index(drop=True, inplace=True) | |
return visible_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment