Created
October 21, 2021 20:41
-
-
Save shmidtelson/15aece812781d1d10046ab338ea59744 to your computer and use it in GitHub Desktop.
Парсим занятия mkr.udau.edu.ua
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from pprint import pprint | |
import requests | |
import re | |
from bs4 import BeautifulSoup | |
url = 'http://mkr.udau.edu.ua/time-table/group?type=1' | |
# Параметры для получения данных | |
data = { | |
csrf_param: csrf_token, | |
"TimeTableForm[facultyId]": "5", | |
"TimeTableForm[course]": "2", | |
"TimeTableForm[groupId]": "1007", | |
"date-picker": "01.10.2020 - 31.10.2022", | |
"TimeTableForm[dateStart]": "01.10.2020", | |
"TimeTableForm[dateEnd]": "31.10.2022", | |
"TimeTableForm[indicationDays]": "5" | |
} | |
session = requests.Session() | |
first_entry_response = session.get(url) | |
soup = BeautifulSoup(first_entry_response.text, 'html.parser') | |
csrf_param = soup.find("meta", attrs={"name": "csrf-param"})['content'] | |
csrf_token = soup.find("meta", attrs={"name": "csrf-token"})['content'] | |
print(f'Получили csrf_param: {csrf_param} и csrf_token: {csrf_token}') | |
table_page = session.post(url, data=data) | |
soup = BeautifulSoup(table_page.text, 'html.parser') | |
script_tag = soup.findAll("script")[-1] # Опасное место, потому что они могут добавить скрипт и парсер перестанет работать | |
regex_search = re.findall(r'"events":(\[.*\])', script_tag.text)[0] # Получаем JSON с данными о уроках из кода | |
result = json.loads(regex_search) | |
pprint(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment