ara303 · October 15, 2024 02:22
diff --git a/close-match-json.py b/close-match-json.py
 import json
 from difflib import get_close_matches
 import os

 os.system("color")

 class clrs:
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'

 with open('input_1.json', 'r', encoding='utf-8-sig') as f:
    contacts = json.load(f)

 with open('input_2.json', 'r', encoding='utf-8-sig') as f:
    data = json.load(f)

 def get_best_match(contact_title, data_titles, threshold=0.7):
    matches = get_close_matches(contact_title, data_titles.keys(), n=1, cutoff=threshold)
    if matches:
        return matches[0]
    return None

 merged_entries = []
 unmatched_entries = []

 # @TODO Look up more about python dictionary, unsure what these do
 data_titles = {entry['title']: entry for entry in data}
 data_link_texts = {entry['link_text']: entry for entry in data if 'link_text' in entry}

 for contact in contacts:
    contact_title = contact['title']

    if contact_title in data_titles:
        print(f"{clrs.OKGREEN}Exact{clrs.ENDC}: {contact_title} -> {data_titles[contact_title]['title']}")
        merged_entries.append({**contact, **data_titles[contact_title]})
    else:
        # Fuzzy matching based on title
        best_match_title = get_best_match(contact_title, data_titles)
        
        if best_match_title:
            print(f"{clrs.WARNING}Title{clrs.ENDC}: {contact_title} -> {best_match_title}")
            merged_entries.append({**contact, **data_titles[best_match_title]})
        else:
            # Fuzzy matching based on link text
            best_match_link = get_best_match(contact_title, data_link_texts)
            if best_match_link:
                print(f"{clrs.WARNING} Link{clrs.ENDC}: {contact_title} -> {best_match_link}")
                merged_entries.append({**contact, **data_link_texts[best_match_link]})
            else:
                unmatched_entries.append(contact)

 for item in unmatched_entries:
    print(f"{clrs.FAIL} None{clrs.ENDC}: {item['title']}")

 # UNCOMMENT THIS IF YOU ACTUALLY WANT IT TO GENERATE THE FILE
 # with open('output.json', 'w', encoding='utf-8') as f:
 #     json.dump(merged_entries, f, ensure_ascii=False, indent=4)
	import json
	from difflib import get_close_matches
	import os

	os.system("color")

	class clrs:
	OKBLUE = '\033[94m'
	OKCYAN = '\033[96m'
	OKGREEN = '\033[92m'
	WARNING = '\033[93m'
	FAIL = '\033[91m'
	ENDC = '\033[0m'

	with open('input_1.json', 'r', encoding='utf-8-sig') as f:
	contacts = json.load(f)

	with open('input_2.json', 'r', encoding='utf-8-sig') as f:
	data = json.load(f)

	def get_best_match(contact_title, data_titles, threshold=0.7):
	matches = get_close_matches(contact_title, data_titles.keys(), n=1, cutoff=threshold)
	if matches:
	return matches[0]
	return None

	merged_entries = []
	unmatched_entries = []

	# @TODO Look up more about python dictionary, unsure what these do
	data_titles = {entry['title']: entry for entry in data}
	data_link_texts = {entry['link_text']: entry for entry in data if 'link_text' in entry}

	for contact in contacts:
	contact_title = contact['title']

	if contact_title in data_titles:
	print(f"{clrs.OKGREEN}Exact{clrs.ENDC}: {contact_title} -> {data_titles[contact_title]['title']}")
	merged_entries.append({contact, data_titles[contact_title]})
	else:
	# Fuzzy matching based on title
	best_match_title = get_best_match(contact_title, data_titles)

	if best_match_title:
	print(f"{clrs.WARNING}Title{clrs.ENDC}: {contact_title} -> {best_match_title}")
	merged_entries.append({contact, data_titles[best_match_title]})
	else:
	# Fuzzy matching based on link text
	best_match_link = get_best_match(contact_title, data_link_texts)
	if best_match_link:
	print(f"{clrs.WARNING} Link{clrs.ENDC}: {contact_title} -> {best_match_link}")
	merged_entries.append({contact, data_link_texts[best_match_link]})
	else:
	unmatched_entries.append(contact)

	for item in unmatched_entries:
	print(f"{clrs.FAIL} None{clrs.ENDC}: {item['title']}")

	# UNCOMMENT THIS IF YOU ACTUALLY WANT IT TO GENERATE THE FILE
	# with open('output.json', 'w', encoding='utf-8') as f:
	# json.dump(merged_entries, f, ensure_ascii=False, indent=4)