Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jonathanhle/79e339ba25bf50d89d912c40e0ba9ff8 to your computer and use it in GitHub Desktop.
Save jonathanhle/79e339ba25bf50d89d912c40e0ba9ff8 to your computer and use it in GitHub Desktop.
Paginated APIs with Python: Four ways!
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Paginated APIs: Four ways!\n",
"\n",
"1. Knowing how many pages/requests you'll need\n",
"2. Doing what the API tells you\n",
"3. Going until there are no more results\n",
"4. Going until there is an error"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# https://pokeapi.co/api/v2/pokemon?limit=100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['results']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(data['results'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=100&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=200&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1100&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data['results']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1200&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# https://pokeapi.co/api/v2/pokemon?offset=0&limit=100\n",
"# https://pokeapi.co/api/v2/pokemon?offset=1100&limit=100\n",
"\n",
"limit = 100\n",
"pokemon = []\n",
"\n",
"for page_num in range(12):\n",
" print(\"----\")\n",
" offset = page_num * limit\n",
" url = f\"https://pokeapi.co/api/v2/pokemon?offset={offset}&limit={limit}\"\n",
" print(\"Requesting\", url)\n",
"\n",
" response = requests.get(url)\n",
" data = response.json()\n",
" pokemon.extend(data['results'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Method: We know how many pages we have method\n",
"# Pros: Maybe easy to think about\n",
"# Cons: You need to know how many pages it is, not very"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Listening to the API"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response = requests.get(url)\n",
"data = response.json()\n",
"data['next']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = data['next']\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data['next']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n",
"pokemon = []\n",
"\n",
"while url:\n",
" print(\"----\")\n",
" print(\"Requesting\", url)\n",
"\n",
" # Make the request\n",
" response = requests.get(url)\n",
" data = response.json()\n",
" \n",
" # Grab the new pokemon\n",
" pokemon.extend(data['results'])\n",
" \n",
" # Update the URL\n",
" url = data['next']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(pokemon)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Method: listening to the API\n",
"# Pros: The API knows going on inside of it,\n",
"# good if you don't know how many pages/results there are\n",
"# Cons: The API has to actually tell you the next page"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Going until there are no more results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1000&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data['results']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1100&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data['results']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1200&limit=100\"\n",
"response = requests.get(url)\n",
"data = response.json()\n",
"data['results']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n",
"pokemon = []\n",
"offset = 0\n",
"limit = 100\n",
"\n",
"while True:\n",
" print(\"----\")\n",
" url = f\"https://pokeapi.co/api/v2/pokemon?offset={offset}&limit={limit}\"\n",
" print(\"Requesting\", url)\n",
" response = requests.get(url)\n",
" data = response.json()\n",
"\n",
" # Did we find any pokemon?\n",
" if len(data['results']) == 0:\n",
" # If not, exit the loop\n",
" break\n",
" \n",
" # If we did find pokemon, add them\n",
" # to our list and then move on to the next offset\n",
" pokemon.extend(data['results'])\n",
"\n",
" offset = offset + 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Method: Going until no more results\n",
"# Pros: Very flexible\n",
"# Cons: Extra requests, doesn't really listen to the API, more lines of code"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Waiting until things break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n",
"pokemon = []\n",
"offset = 0\n",
"limit = 100\n",
"\n",
"while True:\n",
" print(\"----\")\n",
" url = f\"https://pokeapi.co/api/v2/pokemon?offset={offset}&limit={limit}\"\n",
" print(\"Requesting\", url)\n",
" response = requests.get(url)\n",
" data = response.json()\n",
"\n",
" # Debug by printing out the first pokemon\n",
" try:\n",
" print(data['results'][0]) \n",
" except:\n",
" # We found an error!!!\n",
" print(\"There was an error! Exiting the loop\")\n",
" break\n",
" \n",
" # If we did find pokemon, add them\n",
" # to our list and then move on to the next offset\n",
" pokemon.extend(data['results'])\n",
"\n",
" offset = offset + 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Method: Waiting until you hit an error with your processing\n",
"# Pros: Super easy and flexible, good technique for scraping multiple pages\n",
"# Cons: What if there's some OTHER error that isn't a \"you've run out of data\",\n",
"# looks more complicated"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment