Skip to content

Instantly share code, notes, and snippets.

@sbassett29
Created July 5, 2023 14:19
Show Gist options
  • Save sbassett29/f8155bc39797c9dbf30777c269026f0f to your computer and use it in GitHub Desktop.
Save sbassett29/f8155bc39797c9dbf30777c269026f0f to your computer and use it in GitHub Desktop.
Creates a list of likely wmf git author names and emails from ldap.toolforge.org
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# License: Apache 2.0
import csv
import requests
import time
from lxml import html
ldap_base_url = "https://ldap.toolforge.org"
wmf_ldap_url = "https://ldap.toolforge.org/group/wmf"
edge_cases = {
"Timo Tijhof": "[email protected]",
"C. Scott Ananian": "[email protected]",
"seddon": "[email protected]",
"Bartosz Dziewoński": "[email protected]",
}
http_response = requests.get(wmf_ldap_url, timeout=5)
if http_response.status_code == 200:
doc = html.fromstring(http_response.text)
user_urls = doc.xpath("//body//ul//li//a[2]/@href")
user_urls = [ldap_base_url + ueu for ueu in user_urls]
users = {}
count_break = 0
for i, ueu in enumerate(user_urls):
http_response = requests.get(ueu, timeout=60)
if http_response.status_code == 200:
doc = html.fromstring(http_response.text)
user_name = doc.xpath("//body//ul[1]//li[1]//a[1]//text()")
user_email = doc.xpath("//body//ul[1]//li[3]//code[1]//text()")
users["".join(user_name)[5:]] = (
"".join(user_email).replace(" at ", "@").replace(" dot ", ".")
)
time.sleep(1) # toolforge.org wasn't happy at times
if count_break > 0 and i > count_break:
break
# add edge cases
users.update(edge_cases)
# write to csv
with open("wmf_staff.csv", "w") as csvfile:
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_NONE)
for un, ue in users.items():
writer.writerow([un, ue])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment