myuanz · August 26, 2024 08:50
diff --git a/read_visible_rows_to_dataframe.py b/read_visible_rows_to_dataframe.py
 def read_visible_rows_to_dataframe(file_path: str, sheet_name: str|None=None) -> pd.DataFrame:
    """
    Reads the specified sheet of an Excel file, ignores all hidden rows, and returns a pandas DataFrame.

    :param file_path: Path to the Excel file
    :param sheet_name: Name of the sheet to read. If None, reads the active sheet.
    :return: A pandas DataFrame containing data from visible rows
    """

    wb_info = load_workbook(filename=file_path, read_only=False)
    if sheet_name:
        sheet_info = wb_info.get_sheet_by_name(sheet_name)
    else:
        sheet_info = wb_info.active

    hidden_rows = set(r - 2 for r, row_info in sheet_info.row_dimensions.items() if row_info.hidden)
    # Row numbers start from 1, with 1 being the header row. Since pandas starts indexing from 0,
    # we need to subtract 2 to align the row numbers correctly.

    wb_info.close()

    df = pd.read_excel(file_path, engine='openpyxl')
    visible_df = df[~df.index.isin(hidden_rows)]
    visible_df.reset_index(drop=True, inplace=True)

    return visible_df
	def read_visible_rows_to_dataframe(file_path: str, sheet_name: str\|None=None) -> pd.DataFrame:
	"""
	Reads the specified sheet of an Excel file, ignores all hidden rows, and returns a pandas DataFrame.

	:param file_path: Path to the Excel file
	:param sheet_name: Name of the sheet to read. If None, reads the active sheet.
	:return: A pandas DataFrame containing data from visible rows
	"""

	wb_info = load_workbook(filename=file_path, read_only=False)
	if sheet_name:
	sheet_info = wb_info.get_sheet_by_name(sheet_name)
	else:
	sheet_info = wb_info.active

	hidden_rows = set(r - 2 for r, row_info in sheet_info.row_dimensions.items() if row_info.hidden)
	# Row numbers start from 1, with 1 being the header row. Since pandas starts indexing from 0,
	# we need to subtract 2 to align the row numbers correctly.

	wb_info.close()

	df = pd.read_excel(file_path, engine='openpyxl')
	visible_df = df[~df.index.isin(hidden_rows)]
	visible_df.reset_index(drop=True, inplace=True)

	return visible_df
No results found