Last active
August 5, 2024 21:58
-
-
Save Xnuvers007/9f70f1b2158f23f918a3c8a184db1bdc to your computer and use it in GitHub Desktop.
scrape anime list from https://nimegami.id/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import time | |
headers = { | |
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', | |
} | |
def fetch_page(url): | |
while True: | |
try: | |
response = requests.get(url, headers=headers, timeout=30, verify=True) | |
response.raise_for_status() | |
return response | |
except requests.RequestException as e: | |
print(f"Error fetching {url}: {e}. Retrying in 3 seconds.") | |
time.sleep(3) | |
def extract_anime_data(page_url): | |
response = fetch_page(page_url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
daftar_anime = soup.find('div', class_='animelist') | |
anime_list = [] | |
for anime in daftar_anime.find_all('li'): | |
title_element = anime.find('a') | |
status_element = anime.find('span', class_='color_anilist') | |
if title_element and status_element: | |
title = title_element.get_text(strip=True) | |
status = status_element.get_text(strip=True) | |
link = title_element['href'] | |
anime_list.append({'title': title, 'status': status, 'link': link}) | |
return anime_list | |
all_anime_data = [] | |
for page_num in range(1, 8): | |
page_url = f'https://nimegami.id/anime-list/page/{page_num}/' | |
anime_data = extract_anime_data(page_url) | |
all_anime_data.extend(anime_data) | |
time.sleep(2) | |
for anime in all_anime_data: | |
print(f"Title: {anime['title']}, Status: {anime['status']}, Link: {anime['link']}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, jsonify, request | |
from bs4 import BeautifulSoup | |
import requests, re, time | |
app = Flask(__name__) | |
headers = { | |
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', | |
} | |
def fetch_page(url): | |
while True: | |
try: | |
response = requests.get(url, headers=headers, timeout=10) | |
response.raise_for_status() | |
return response | |
except requests.RequestException as e: | |
print(f"Error fetching {url}: {e}. Retrying in 3 seconds.") | |
time.sleep(3) | |
def extract_anime_data(page_url, status_filter=None): | |
response = fetch_page(page_url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
daftar_anime = soup.find('div', class_='animelist') | |
anime_list = [] | |
if daftar_anime: | |
for anime in daftar_anime.find_all('li'): | |
title_element = anime.find('a') | |
status_element = anime.find('span', class_='color_anilist') | |
if title_element and status_element: | |
title = title_element.get_text(strip=True) | |
status = status_element.get_text(strip=True) | |
link = title_element['href'] | |
# Apply status filter if provided | |
if status_filter is None or status_filter.lower() in status.lower(): | |
anime_list.append({'title': title, 'status': status, 'link': link}) | |
return anime_list | |
@app.route('/listanime', methods=['GET']) | |
@app.route('/listanime/', methods=['GET']) | |
def list_anime(): | |
all_anime_data = [] | |
for page_num in range(1, 8): | |
page_url = f'https://nimegami.id/anime-list/page/{page_num}/' | |
anime_data = extract_anime_data(page_url) | |
all_anime_data.extend(anime_data) | |
time.sleep(2) | |
return jsonify(all_anime_data) | |
@app.route('/listanime/complete', methods=['GET']) | |
@app.route('/listanime/complete/', methods=['GET']) | |
def list_anime_complete(): | |
all_anime_data = [] | |
for page_num in range(1, 8): | |
page_url = f'https://nimegami.id/anime-list/page/{page_num}/' | |
anime_data = extract_anime_data(page_url, status_filter='Complete') | |
all_anime_data.extend(anime_data) | |
time.sleep(2) | |
return jsonify(all_anime_data) | |
@app.route('/listanime/ongoing', methods=['GET']) | |
@app.route('/listanime/ongoing/', methods=['GET']) | |
def list_anime_ongoing(): | |
all_anime_data = [] | |
for page_num in range(1, 8): | |
page_url = f'https://nimegami.id/anime-list/page/{page_num}/' | |
anime_data = extract_anime_data(page_url, status_filter='On-Going') | |
all_anime_data.extend(anime_data) | |
time.sleep(2) | |
return jsonify(all_anime_data) | |
@app.route('/listanime/<int:page_num>', methods=['GET']) | |
def list_anime_page(page_num): | |
if page_num < 1 or page_num > 7: | |
return jsonify({ | |
'message': 'Page number out of range. Please use a page number between 1 and 7.', | |
'status': 'error', | |
'tips': 'Try using a number between 1 and 7.', | |
'allanime': request.host_url + 'listanime', | |
'animewithpage': request.host_url + 'listanime/1' | |
}), 400 | |
page_url = f'https://nimegami.id/anime-list/page/{page_num}/' | |
anime_data = extract_anime_data(page_url) | |
return jsonify(anime_data) | |
@app.errorhandler(404) | |
def not_found_error(error): | |
requested_url = request.path | |
if re.match(r'^/listanime(/)?$', requested_url): | |
return jsonify({ | |
'message': 'The requested page does not exist. Please use a valid page number.', | |
'status': str(error), | |
'tips': 'Visit the main page to see the list of available pages.', | |
'allanime': request.host_url + 'listanime', | |
'animewithpage': request.host_url + 'listanime/1' | |
}), 404 | |
if re.match(r'^/listanime/complete(/)?$', requested_url): | |
return jsonify({ | |
'message': 'The requested page does not exist. Please use a valid page number.', | |
'status': str(error), | |
'tips': 'Visit the complete anime page to see the list of available pages.', | |
'allanime': request.host_url + 'listanime/complete', | |
'animewithpage': request.host_url + 'listanime/complete/1' | |
}), 404 | |
if re.match(r'^/listanime/ongoing(/)?$', requested_url): | |
return jsonify({ | |
'message': 'The requested page does not exist. Please use a valid page number.', | |
'status': str(error), | |
'tips': 'Visit the ongoing anime page to see the list of available pages.', | |
'allanime': request.host_url + 'listanime/ongoing', | |
'animewithpage': request.host_url + 'listanime/ongoing/1' | |
}), 404 | |
return jsonify({ | |
'message': 'The requested resource was not found.', | |
'status': str(error), | |
'allanime': request.host_url + 'listanime', | |
'animewithpage': request.host_url + 'listanime/1', | |
'tips': 'Check the URL and try again.' | |
}), 404 | |
if __name__ == '__main__': | |
app.run(debug=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment