chirdeeptomar · December 11, 2012 13:31 · zooid · Jun 11, 2018
diff --git a/excel_reader.py b/excel_reader.py
 from openpyxl import load_workbook
 import os


 dir_name = os.path.relpath(os.path.dirname(__file__))
 file_name = os.path.join(dir_name, 'Data.xlsx')

 unique_items = []
 all_items = []

 def add_to_list(item):
    if item not in unique_items:
        unique_items.append(item)

 def find_duplicates(filename, has_header = False):
 	wb = load_workbook(filename = filename, use_iterators = True)
 	ws = wb.get_active_sheet() # ws is now an IterableWorksheet

 	for row in ws.iter_rows(): # it brings a new method: iter_rows()
 		if has_header:
 		    has_header = False
 		    continue
 		else:
 			item = ''
 			for cell in row:				
 				if cell.column == 'A' or cell.column == 'F': #or cell.column == 'J' or cell.column == 'L'
 				    item += (cell.internal_value + '#')
 			add_to_list(item)		
 			all_items.append(item)	


 find_duplicates(file_name, True)

 total_duplicates = 0 

 print("Total Items: ", len(all_items))
 print("Total Unique Items: ", len(unique_items))

 for x in unique_items:
 	if all_items.count(x) > 1:
 		total_duplicates +=1	
 		print ("Duplicate Item: ", x)	

 print("Total Duplicates Found: ", total_duplicates)
	from openpyxl import load_workbook
	import os


	dir_name = os.path.relpath(os.path.dirname(__file__))
	file_name = os.path.join(dir_name, 'Data.xlsx')

	unique_items = []
	all_items = []

	def add_to_list(item):
	if item not in unique_items:
	unique_items.append(item)

	def find_duplicates(filename, has_header = False):
	wb = load_workbook(filename = filename, use_iterators = True)
	ws = wb.get_active_sheet() # ws is now an IterableWorksheet

	for row in ws.iter_rows(): # it brings a new method: iter_rows()
	if has_header:
	has_header = False
	continue
	else:
	item = ''
	for cell in row:
	if cell.column == 'A' or cell.column == 'F': #or cell.column == 'J' or cell.column == 'L'
	item += (cell.internal_value + '#')
	add_to_list(item)
	all_items.append(item)


	find_duplicates(file_name, True)

	total_duplicates = 0

	print("Total Items: ", len(all_items))
	print("Total Unique Items: ", len(unique_items))

	for x in unique_items:
	if all_items.count(x) > 1:
	total_duplicates +=1
	print ("Duplicate Item: ", x)

	print("Total Duplicates Found: ", total_duplicates)