Created
November 30, 2022 14:36
-
-
Save andfanilo/a82cc70910e3a3dd7279055ec89a3e7d to your computer and use it in GitHub Desktop.
Scrape Components Tracker wiki
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
pip install beautifulsoup4 ghapi requests streamlit | |
Add [github] > key value of your Github API Token in .streamlit/secrets.toml | |
streamlit run streamlit_app.py | |
""" | |
from typing import Tuple | |
import pandas as pd | |
import requests | |
import streamlit as st | |
from bs4 import BeautifulSoup | |
from bs4.element import ResultSet | |
from ghapi.all import GhApi | |
from ghapi.all import paged | |
@st.experimental_singleton | |
def load_api(): | |
token = st.secrets["github"]["key"] | |
return GhApi(token=token) | |
def check_rate_limit(): | |
return load_api().rate_limit.get() | |
def scrape_tracker_wiki() -> ResultSet: | |
url = "https://discuss.streamlit.io/t/streamlit-components-community-tracker/4634.json" | |
resp = requests.get(url) | |
assert resp.status_code == 200 | |
data = resp.json()["post_stream"]["posts"][0]["cooked"] | |
soup = BeautifulSoup(f"<html>{data}</html>", "html.parser") | |
published_soup = soup.find("h3").find_next_sibling() | |
all_links = published_soup.find_all("a", text="GitHub") | |
return all_links | |
def extract_info_from_link(soup_link: str) -> Tuple[str, str]: | |
arr = soup_link.split("/") | |
user = arr[-2] | |
repo = arr[-1] | |
return (user, repo) | |
@st.experimental_memo | |
def count_stargazers(link: str) -> Tuple[str, int]: | |
username, repository = extract_info_from_link(link) | |
api = load_api() | |
all_pages = paged(api.activity.list_stargazers_for_repo, username, repository) | |
data = [user for page in all_pages for user in page] | |
return (f"{username}/{repository}", len(data)) | |
def main(): | |
all_links = scrape_tracker_wiki() | |
components_stars = [count_stargazers(l["href"]) for l in all_links] | |
df = pd.DataFrame(components_stars, columns=["repo", "count"]) | |
st.dataframe(df) | |
if __name__ == "__main__": | |
st.set_page_config(page_title="Components Tracker", page_icon=":book:") | |
st.title("All the Components") | |
with st.sidebar: | |
st.header("Configuration") | |
st.write(check_rate_limit()) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment