Forked from jsoma/Using paginated APIs (4 ways!).ipynb
Created
April 26, 2023 08:14
-
-
Save jonathanhle/79e339ba25bf50d89d912c40e0ba9ff8 to your computer and use it in GitHub Desktop.
Paginated APIs with Python: Four ways!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Paginated APIs: Four ways!\n", | |
"\n", | |
"1. Knowing how many pages/requests you'll need\n", | |
"2. Doing what the API tells you\n", | |
"3. Going until there are no more results\n", | |
"4. Going until there is an error" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# https://pokeapi.co/api/v2/pokemon?limit=100" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"\n", | |
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data['results']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(data['results'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=100&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=200&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1100&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data['results']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1200&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# https://pokeapi.co/api/v2/pokemon?offset=0&limit=100\n", | |
"# https://pokeapi.co/api/v2/pokemon?offset=1100&limit=100\n", | |
"\n", | |
"limit = 100\n", | |
"pokemon = []\n", | |
"\n", | |
"for page_num in range(12):\n", | |
" print(\"----\")\n", | |
" offset = page_num * limit\n", | |
" url = f\"https://pokeapi.co/api/v2/pokemon?offset={offset}&limit={limit}\"\n", | |
" print(\"Requesting\", url)\n", | |
"\n", | |
" response = requests.get(url)\n", | |
" data = response.json()\n", | |
" pokemon.extend(data['results'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Method: We know how many pages we have method\n", | |
"# Pros: Maybe easy to think about\n", | |
"# Cons: You need to know how many pages it is, not very" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Listening to the API" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data['next']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = data['next']\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data['next']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n", | |
"pokemon = []\n", | |
"\n", | |
"while url:\n", | |
" print(\"----\")\n", | |
" print(\"Requesting\", url)\n", | |
"\n", | |
" # Make the request\n", | |
" response = requests.get(url)\n", | |
" data = response.json()\n", | |
" \n", | |
" # Grab the new pokemon\n", | |
" pokemon.extend(data['results'])\n", | |
" \n", | |
" # Update the URL\n", | |
" url = data['next']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(pokemon)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Method: listening to the API\n", | |
"# Pros: The API knows going on inside of it,\n", | |
"# good if you don't know how many pages/results there are\n", | |
"# Cons: The API has to actually tell you the next page" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Going until there are no more results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1000&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data['results']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1100&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data['results']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?offset=1200&limit=100\"\n", | |
"response = requests.get(url)\n", | |
"data = response.json()\n", | |
"data['results']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n", | |
"pokemon = []\n", | |
"offset = 0\n", | |
"limit = 100\n", | |
"\n", | |
"while True:\n", | |
" print(\"----\")\n", | |
" url = f\"https://pokeapi.co/api/v2/pokemon?offset={offset}&limit={limit}\"\n", | |
" print(\"Requesting\", url)\n", | |
" response = requests.get(url)\n", | |
" data = response.json()\n", | |
"\n", | |
" # Did we find any pokemon?\n", | |
" if len(data['results']) == 0:\n", | |
" # If not, exit the loop\n", | |
" break\n", | |
" \n", | |
" # If we did find pokemon, add them\n", | |
" # to our list and then move on to the next offset\n", | |
" pokemon.extend(data['results'])\n", | |
"\n", | |
" offset = offset + 100" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Method: Going until no more results\n", | |
"# Pros: Very flexible\n", | |
"# Cons: Extra requests, doesn't really listen to the API, more lines of code" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Waiting until things break" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://pokeapi.co/api/v2/pokemon?limit=100\"\n", | |
"pokemon = []\n", | |
"offset = 0\n", | |
"limit = 100\n", | |
"\n", | |
"while True:\n", | |
" print(\"----\")\n", | |
" url = f\"https://pokeapi.co/api/v2/pokemon?offset={offset}&limit={limit}\"\n", | |
" print(\"Requesting\", url)\n", | |
" response = requests.get(url)\n", | |
" data = response.json()\n", | |
"\n", | |
" # Debug by printing out the first pokemon\n", | |
" try:\n", | |
" print(data['results'][0]) \n", | |
" except:\n", | |
" # We found an error!!!\n", | |
" print(\"There was an error! Exiting the loop\")\n", | |
" break\n", | |
" \n", | |
" # If we did find pokemon, add them\n", | |
" # to our list and then move on to the next offset\n", | |
" pokemon.extend(data['results'])\n", | |
"\n", | |
" offset = offset + 100" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"## Method: Waiting until you hit an error with your processing\n", | |
"# Pros: Super easy and flexible, good technique for scraping multiple pages\n", | |
"# Cons: What if there's some OTHER error that isn't a \"you've run out of data\",\n", | |
"# looks more complicated" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment