Created
June 27, 2023 09:13
-
-
Save dadatuputi/e4babdfdcdc5594418be5bc6e3caec27 to your computer and use it in GitHub Desktop.
Convert a GED file to a list of first and last names - used in looking through a family tree for baby names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gedcom.element.individual import IndividualElement | |
from gedcom.parser import Parser | |
import re, string | |
def valid_name(name): | |
if not name: | |
return False | |
name = name.strip() | |
if not name: | |
return False | |
if len(name) < 3: | |
return False | |
return True | |
pattern = re.compile('[\W_]+') | |
file_path = 'out.ged' | |
gedcom_parser = Parser() | |
gedcom_parser.parse_file(file_path) | |
root_child_elements = gedcom_parser.get_root_child_elements() | |
first_names = {} | |
last_names = {} | |
discarded = {} | |
# Iterate through all root child elements | |
for element in root_child_elements: | |
# Is the `element` an actual `IndividualElement`? (Allows usage of extra functions such as `surname_match` and `get_name`.) | |
if isinstance(element, IndividualElement): | |
# Unpack the name tuple | |
(firsts, last) = element.get_name() | |
for first in firsts.split(' '): | |
first = pattern.sub('', first).capitalize() | |
if valid_name(first): | |
value = 1 | |
if first in first_names: | |
value = first_names[first] + 1 | |
first_names[first] = value | |
last = pattern.sub('', last).capitalize() | |
if valid_name(last): | |
value = 1 | |
if last in last_names: | |
value = last_names[last] + 1 | |
last_names[last] = value | |
first_names = sorted(first_names.items(), key=lambda x: x[1], reverse=True) | |
last_names = sorted(last_names.items(), key=lambda x: x[1], reverse=True) | |
with open('first.txt', 'w') as f: | |
for first, num in first_names: | |
f.write("{}: {}\n".format(first, num)) | |
with open('last.txt', 'w') as f: | |
for last, num in last_names: | |
f.write("{}: {}\n".format(last, num)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment