Created
October 30, 2024 23:15
-
-
Save j2kun/5c042a939d9f1ad26b8e91dd8bf160c6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import datetime | |
import requests | |
import sys | |
from scripts import utils as utils | |
def get_dois_from_page(page, since_days): | |
posts = page["hits"] | |
dois = dict() | |
for post in posts: | |
post = post["document"] | |
doi = post["doi"] | |
url = post["url"] | |
if not url: | |
print(f"Skipping post {doi} because it has no URL. Post: {post}") | |
continue | |
created_at = datetime.datetime.fromtimestamp(post["published_at"]) | |
now = datetime.datetime.now() | |
diff_days = (now - created_at).days | |
if diff_days > since_days: | |
# we already manually handled this webmention with the initial | |
# script run | |
print( | |
f"Skipping post because its publication date ({created_at}) " | |
f"is older than the threshold of {since_days} days since " | |
f"today ({now}); diff_days={diff_days}." | |
) | |
return dois | |
dois[utils.url_to_filepath(url)] = doi | |
return dois | |
def main(blog_slug, since_days=7): | |
# https://api.rogue-scholar.org/posts?blog_slug=jeremykun&sort=published_at&order=desc | |
search_url = ( | |
"https://api.rogue-scholar.org/posts?" | |
f"blog_slug={blog_slug}" | |
"&sort=published_at&order=desc&per_page=50" | |
) | |
try: | |
r = requests.get(search_url) | |
except requests.exceptions.RequestException as e: | |
print(e) | |
sys.exit(1) | |
response = r.json() | |
num_hits = response["found"] | |
num_pages = num_hits // 50 + 1 | |
print(f"Found {num_hits} posts across {num_pages} paginated search pages.") | |
dois = dict() | |
for page in range(1, num_pages+1): | |
print(f"Querying page {page}") | |
try: | |
r = requests.get(f"{search_url}&page={page}") | |
except requests.exceptions.RequestException as e: | |
print(e) | |
sys.exit(1) | |
response = r.json() | |
next_dois = get_dois_from_page(response, since_days) | |
if not next_dois: | |
print("No more posts to process.") | |
break | |
dois.update(next_dois) | |
print(f"Found {len(dois)} dois to process.") | |
git_root = utils.get_git_root() | |
for path, doi in dois.items(): | |
print(f"Processing {path} with DOI {doi}") | |
post_path = git_root / path | |
with open(post_path, "r") as infile: | |
post_lines = infile.readlines() | |
output = utils.add_link(post_lines, "doi", doi) | |
with open(post_path, "w") as outfile: | |
outfile.write(output) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-b", "--blog_slug", default="jeremykun") | |
parser.add_argument("-s", "--since_days", type=int, default=7) | |
args = parser.parse_args() | |
main(args.blog_slug, args.since_days) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment