Created
May 13, 2021 08:22
-
-
Save BroHui/57460d9caae37e6ce74927859295adb7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import warnings | |
import string | |
from openpyxl import Workbook, load_workbook | |
BLANK_LINES = 3 | |
class ExcelBook(object): | |
def __init__(self): | |
self.parse_mode = 1 | |
self.wb = None | |
self.sheets = [] | |
self.ws = None | |
self.cols = [] | |
self.row_range = [] | |
self.headers = [] | |
def load(self, xlsx_file_name=""): | |
if not xlsx_file_name: | |
warnings.warn("Missing xlsx file name.") | |
return | |
self.wb = load_workbook(xlsx_file_name) | |
self.sheets = self.wb.sheetnames | |
self.ws = self.wb.active | |
# peek the rows and cols at the excel | |
_, _, self.cols = self.get_cols_range() | |
row_start, row_end, _ = self.get_rows_range() | |
self.row_range = (row_start, row_end) | |
# try to load headers | |
self.headers = self.get_headers() | |
def get_cols_range(self): | |
""" | |
excel cols range peek | |
:return: | |
""" | |
col_start = string.ascii_uppercase[0] | |
col_end = '' | |
col_array = [] | |
blank_col_count = 0 | |
for alphabet in string.ascii_uppercase: | |
if self.ws['{}1'.format(alphabet)].value: | |
blank_col_count = 0 | |
col_array.append(alphabet) | |
col_end = alphabet | |
continue | |
# blank col, start counting | |
blank_col_count += 1 | |
if blank_col_count == 3: | |
break | |
return col_start, col_end, col_array | |
def get_rows_range(self, skip_table_headers=1, the_first_col='A'): | |
""" | |
excel rows range peek | |
default table header rows set as one. | |
:return: | |
""" | |
row_start = 1 + skip_table_headers | |
row_end = 1 + skip_table_headers | |
row_array = [] | |
blank_row_count = 0 | |
while 1: | |
pos_tag = '{}{}'.format(the_first_col, row_end) | |
if self.ws[pos_tag].value: | |
blank_row_count = 0 | |
row_array.append(pos_tag) | |
row_end += 1 | |
continue | |
# blank row, start counting | |
blank_row_count += 1 | |
if blank_row_count == 3: | |
row_end -= 1 | |
break | |
return row_start, row_end, row_array | |
def get_headers(self, headers_row=1, headers_blocks=[]): | |
headers = [] | |
# _, _, col_range = self.get_cols_range() | |
for col in self.cols: | |
header_pos = "{}{}".format(col, headers_row) | |
cell_value = self.ws[header_pos].value | |
if not cell_value: | |
continue | |
headers.append(cell_value) | |
print(headers) | |
return headers | |
def get_data(self): | |
data = [] | |
row_start, row_end = self.row_range | |
for i in range(row_start, row_end+1): | |
single_data = {} | |
for k in self.cols: | |
pos = "{}{}".format(k, i) | |
cell_value = self.ws[pos].value or "" | |
t_h = self.headers[self.cols.index(k)] | |
single_data[t_h] = cell_value | |
data.append(single_data) | |
return data | |
class SimpleExcelBook(ExcelBook): | |
def __init__(self): | |
super(SimpleExcelBook, self).__init__() | |
if __name__ == '__main__': | |
book = SimpleExcelBook() | |
book.load('EnglishWordSmaple.xlsx') | |
book.get_data() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment