Skip to content

Instantly share code, notes, and snippets.

@edsu
Last active November 10, 2025 13:04
Show Gist options
  • Select an option

  • Save edsu/3a30bb66cf15165a5e7078d22c7a3082 to your computer and use it in GitHub Desktop.

Select an option

Save edsu/3a30bb66cf15165a5e7078d22c7a3082 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# This program will introspect on an OpenAlex API filter call and try to
# determine what record in the cursored result set is causing a problem.
#
# ./openalexbug.py https://api.openalex.org/works?filter=author.id:https://openalex.org/A5003671931&cursor=&per-page=200
# problem record: https://openalex.org/W3200281942
# 121 records
#
# /// script
# dependencies = ["requests"]
# ///
#
import sys
import time
from urllib.parse import urlparse, parse_qs
import requests
if len(sys.argv) != 2:
sys.exit("usage: openalexbug.py <API URL>")
api_url = urlparse(sys.argv[1])
if api_url.netloc != 'api.openalex.org':
sys.exit(f"{sys.argv[1]} isn't an OpenAlex API URL")
url = 'https://api.openalex.org/' + api_url.path
params = parse_qs(api_url.query)
params['cursor'] = '*'
params['per-page'] = 200
record_count = 0
while True:
time.sleep(1)
resp= requests.get(url, params)
if resp.status_code == 500:
params['per-page'] = int(params['per-page'] / 2)
# cut the page size in half to zero in on problematic record
if params['per-page'] > 1:
continue
else:
# just get the ID instead of the full record, so we can advance the cursor
params['select'] = 'id'
resp = requests.get(url, params)
if resp.status_code == 200:
print(f"problem record: {resp.json()['results'][0]['id']}")
del params['select']
else:
# this shouldn't happen when just getting the id?
resp.raise_for_status()
results = resp.json()
record_count += len(results['results'])
params['per-page'] == 200
params['cursor'] = results['meta'].get('next_cursor')
# if there's no cursor we're done!
if not params['cursor']:
print(f"{record_count} records")
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment