Skip to content

Instantly share code, notes, and snippets.

@j2kun
Created October 30, 2024 23:15
Show Gist options
  • Save j2kun/5c042a939d9f1ad26b8e91dd8bf160c6 to your computer and use it in GitHub Desktop.
Save j2kun/5c042a939d9f1ad26b8e91dd8bf160c6 to your computer and use it in GitHub Desktop.
import argparse
import datetime
import requests
import sys
from scripts import utils as utils
def get_dois_from_page(page, since_days):
posts = page["hits"]
dois = dict()
for post in posts:
post = post["document"]
doi = post["doi"]
url = post["url"]
if not url:
print(f"Skipping post {doi} because it has no URL. Post: {post}")
continue
created_at = datetime.datetime.fromtimestamp(post["published_at"])
now = datetime.datetime.now()
diff_days = (now - created_at).days
if diff_days > since_days:
# we already manually handled this webmention with the initial
# script run
print(
f"Skipping post because its publication date ({created_at}) "
f"is older than the threshold of {since_days} days since "
f"today ({now}); diff_days={diff_days}."
)
return dois
dois[utils.url_to_filepath(url)] = doi
return dois
def main(blog_slug, since_days=7):
# https://api.rogue-scholar.org/posts?blog_slug=jeremykun&sort=published_at&order=desc
search_url = (
"https://api.rogue-scholar.org/posts?"
f"blog_slug={blog_slug}"
"&sort=published_at&order=desc&per_page=50"
)
try:
r = requests.get(search_url)
except requests.exceptions.RequestException as e:
print(e)
sys.exit(1)
response = r.json()
num_hits = response["found"]
num_pages = num_hits // 50 + 1
print(f"Found {num_hits} posts across {num_pages} paginated search pages.")
dois = dict()
for page in range(1, num_pages+1):
print(f"Querying page {page}")
try:
r = requests.get(f"{search_url}&page={page}")
except requests.exceptions.RequestException as e:
print(e)
sys.exit(1)
response = r.json()
next_dois = get_dois_from_page(response, since_days)
if not next_dois:
print("No more posts to process.")
break
dois.update(next_dois)
print(f"Found {len(dois)} dois to process.")
git_root = utils.get_git_root()
for path, doi in dois.items():
print(f"Processing {path} with DOI {doi}")
post_path = git_root / path
with open(post_path, "r") as infile:
post_lines = infile.readlines()
output = utils.add_link(post_lines, "doi", doi)
with open(post_path, "w") as outfile:
outfile.write(output)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-b", "--blog_slug", default="jeremykun")
parser.add_argument("-s", "--since_days", type=int, default=7)
args = parser.parse_args()
main(args.blog_slug, args.since_days)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment