Created
February 16, 2025 11:13
-
-
Save eruffaldi/82360ce0faf6df1eed7e9207af2f680e to your computer and use it in GitHub Desktop.
Remove Strikeouts from Excel in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# given excel file removes strikeout | |
# Emanuele Ruffaldi MMI 2025 | |
# requires openpyxl | |
# Tested with 3.10.12 and openpyxl 3.1.4 | |
# | |
import sys | |
from openpyxl import load_workbook | |
from openpyxl.styles import Font | |
from openpyxl.cell.rich_text import Text, CellRichText | |
import shutil | |
def remove_strikeout_text(wb): | |
"""Removes strikeout from all sheets. Return true if any found""" | |
changed = False | |
for sheet in wb.worksheets: | |
for row in sheet.iter_rows(): | |
for cell in row: | |
cellstrike = cell.font and cell.font.strike | |
if isinstance(cell.value, CellRichText): | |
l = list(cell.value) # .as_list() will return str | |
if cellstrike: | |
# remove all the str and preserve the correct TextBlocks | |
nl = [t for t in l if not isinstance(t,str) and (not t.font or not t.font.strike)] | |
else: | |
nl = [t for t in l if isinstance(t,str) or not t.font or not t.font.strike] | |
if len(nl) != len(l): | |
# Changed to: empty, simple string, CellRichText | |
if len(nl) == 0: | |
cell.value = "" | |
elif all((isinstance(t,str) for t in nl)): | |
cell.value = "".join(nl) | |
else: | |
cell.value = CellRichText(nl) | |
changed = True | |
elif cell.value and cellstrike: | |
# Make it empty string for string, None for other types | |
if isinstance(cell.value,str): | |
cell.value = "" | |
else: | |
cell.value = None | |
changed = True | |
# remove strike from font anyway | |
if cellstrike: | |
cell.font = Font(**{**cell.font.__dict__, "strike": False}) | |
changed = True | |
return changed | |
def main(): | |
if len(sys.argv) == 1: | |
print("Expected: inputfile [outputfile]",file=sys.stderr) | |
else: | |
infile = sys.argv[1] | |
outfile = sys.argv[1]+".cleaned.xlsx" if len(sys.argv) < 3 else sys.argv[2] | |
wb = load_workbook(infile,rich_text=True) | |
if remove_strikeout_text(wb): | |
wb.save(outfile) | |
else: | |
#copy2 does full metadata | |
print("Full copy no strike",file=sys.stderr) | |
shutil.copy2(infile,outfile) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment