Last active
March 23, 2016 20:50
-
-
Save arvidfm/b9a970c6ce9dd52311f8 to your computer and use it in GitHub Desktop.
Fetch course lists from kth.se
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import asyncio | |
import re | |
import sys | |
import aiohttp | |
import lxml.html | |
async def get_periods(session, course): | |
async with session.get("https://www.kth.se/student/kurser/kurs/{}?l=en".format(course)) as r: | |
doc = lxml.html.fromstring(await r.text()) | |
periods = doc.xpath(".//div[@id='courseRoundBlocks']/div[not(h3/a/span[text()='CANCELLED'])]/" | |
"div/ul[@class='infoset']/li[1]/p/text()") | |
return [period.strip() for period in periods] | |
async def main(): | |
def credit_type(string): | |
match = re.match("^(\d+)(?:\.(\d+))?$", string) | |
if match is None: | |
raise argparse.ArgumentTypeError("{} is not a valid amount of credits".format(string)) | |
if match.groups()[-1] is None: | |
return int(match.groups()[0]), 0 | |
else: | |
return tuple(map(int, match.groups())) | |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument('category', help="course category (e.g. DD, DT, SF)") | |
parser.add_argument('--max-credits', default="30.0", type=credit_type, | |
help="do not include courses that comprise more " | |
"than the given amount of credits") | |
parser.add_argument('--min-credits', default="0.0", type=credit_type, | |
help="do not include courses that comprise less " | |
"than the given amount of credits") | |
parser.add_argument('--period', choices=['P1', 'P2', 'P3', 'P4'], | |
help="the period the course is held") | |
args = parser.parse_args() | |
with aiohttp.ClientSession() as session: | |
async with session.get( | |
"https://www.kth.se/student/kurser/org/{}?l=en".format(args.category)) as r: | |
if r.status != 200: | |
print("Error: Couldn't fetch course list. Did you supply a valid category?") | |
return 1 | |
doc = lxml.html.fromstring(await r.text()) | |
courses = [] | |
course_list = doc.xpath(".//div[@id='searchResult']/table/tbody/tr") | |
print("Fetching {} courses...".format(len(course_list))) | |
for i, tr in enumerate(course_list): | |
name, credits, code, level = [col.strip() | |
for col in tr.xpath("./td//text()") | |
if len(col.strip()) > 0] | |
credits = tuple(map(int, credits.split("."))) | |
if level not in ('First cycle', 'Second cycle') or (credits < args.min_credits or | |
credits > args.max_credits): | |
continue | |
periods = await get_periods(session, code) | |
if args.period is not None and not any(match.group() == args.period | |
for match in (re.search("P\d", period) | |
for period in periods) | |
if match is not None): | |
continue | |
periods = "; ".join(period for period in set(periods) | |
if args.period is None or args.period in period) | |
if len(periods) == 0: | |
periods = "[This course is not scheduled to be offered.]" | |
courses.append((credits, "{}[{}] ({}.{} hp) {}; {}".format( | |
"*" if level == "Second cycle" else " ", code, *credits, name, periods))) | |
if len(courses) == 0: | |
print("No matching courses found.") | |
else: | |
courses.sort() | |
print("\n".join(s for _, s in courses)) | |
loop = asyncio.get_event_loop() | |
sys.exit(loop.run_until_complete(main())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment