Created
July 5, 2023 14:19
-
-
Save sbassett29/f8155bc39797c9dbf30777c269026f0f to your computer and use it in GitHub Desktop.
Creates a list of likely wmf git author names and emails from ldap.toolforge.org
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# License: Apache 2.0 | |
import csv | |
import requests | |
import time | |
from lxml import html | |
ldap_base_url = "https://ldap.toolforge.org" | |
wmf_ldap_url = "https://ldap.toolforge.org/group/wmf" | |
edge_cases = { | |
"Timo Tijhof": "[email protected]", | |
"C. Scott Ananian": "[email protected]", | |
"seddon": "[email protected]", | |
"Bartosz Dziewoński": "[email protected]", | |
} | |
http_response = requests.get(wmf_ldap_url, timeout=5) | |
if http_response.status_code == 200: | |
doc = html.fromstring(http_response.text) | |
user_urls = doc.xpath("//body//ul//li//a[2]/@href") | |
user_urls = [ldap_base_url + ueu for ueu in user_urls] | |
users = {} | |
count_break = 0 | |
for i, ueu in enumerate(user_urls): | |
http_response = requests.get(ueu, timeout=60) | |
if http_response.status_code == 200: | |
doc = html.fromstring(http_response.text) | |
user_name = doc.xpath("//body//ul[1]//li[1]//a[1]//text()") | |
user_email = doc.xpath("//body//ul[1]//li[3]//code[1]//text()") | |
users["".join(user_name)[5:]] = ( | |
"".join(user_email).replace(" at ", "@").replace(" dot ", ".") | |
) | |
time.sleep(1) # toolforge.org wasn't happy at times | |
if count_break > 0 and i > count_break: | |
break | |
# add edge cases | |
users.update(edge_cases) | |
# write to csv | |
with open("wmf_staff.csv", "w") as csvfile: | |
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_NONE) | |
for un, ue in users.items(): | |
writer.writerow([un, ue]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment