Created
March 29, 2024 16:07
-
-
Save Heinrich-XIAO/cacf0f09a76fb1e03a0bb186bf2ce7bc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scrapes duolingo for all users | |
import requests | |
from datetime import datetime, timezone | |
def date(epoch_time): | |
date_obj = datetime.fromtimestamp(epoch_time, timezone.utc) | |
# Format the datetime object to dd/mm/yy hh:mm:ss format | |
formatted_date = date_obj.strftime('%d/%m/%y %H:%M:%S') | |
return formatted_date | |
for i in range(1, 1000000000000000000): | |
headers = {'User-Agent': 'Mozilla/5.0 (X11; Windows x86_64; rv:120.0) Gecko/20100101 Firefox/120.0',} | |
res = requests.get(f'https://www.duolingo.com/2017-06-30/users/{i}?fields=courses,creationDate,fromLanguage,gemsConfig,globalAmbassadorStatus,hasPlus,id,learningLanguage,lingots,name,picture,roles,streak,streakData{{currentStreak,previousStreak}},subscriberLevel,totalXp,username', headers=headers) | |
if not res.text.startswith('404'): | |
json = res.json() | |
if json == {}: | |
continue | |
f = open("output.txt", "a") | |
f.write(str(json)) | |
print(f'User Id: {json["id"]}') | |
print(f'Username: {json["username"]}') | |
print(f'Speaks: {json["fromLanguage"]}') | |
print(f'Streak: {json["streak"]}') | |
print(f'Account Creation Date: {date(json["creationDate"])}') | |
print(f'Total XP: {json["totalXp"]}') | |
if 'name' in json: | |
print(f'Name: {json["name"]}') | |
print(f'Learning:') | |
duplicates = [] | |
for course in json["courses"]: | |
if course['title'] not in duplicates: | |
print(f' {course["title"]}') | |
duplicates.append(course['title']) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment