Created
February 26, 2024 06:45
-
-
Save LukasWoodtli/4d9d8d5d3b5d95bfddbcc50df172ee11 to your computer and use it in GitHub Desktop.
Get the table of content for a Coursera course
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
if __name__ == '__main__': | |
req = requests.get("https://www.coursera.org/learn/dsp4?specialization=digital-signal-processing") | |
bs = BeautifulSoup(req.text) | |
tags = bs.find_all(lambda tag: tag.name == 'h3' or (tag.has_attr('data-test-id') and tag['data-test-id'] == 'item-view')) | |
for t in tags: | |
try: | |
print(t.findAll(string=True, recursive=False)[0]) | |
except Exception: | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment