Skip to content

Instantly share code, notes, and snippets.

@Xnuvers007
Last active August 5, 2024 21:58
Show Gist options
  • Save Xnuvers007/9f70f1b2158f23f918a3c8a184db1bdc to your computer and use it in GitHub Desktop.
Save Xnuvers007/9f70f1b2158f23f918a3c8a184db1bdc to your computer and use it in GitHub Desktop.
scrape anime list from https://nimegami.id/
import requests
from bs4 import BeautifulSoup
import time
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
}
def fetch_page(url):
while True:
try:
response = requests.get(url, headers=headers, timeout=30, verify=True)
response.raise_for_status()
return response
except requests.RequestException as e:
print(f"Error fetching {url}: {e}. Retrying in 3 seconds.")
time.sleep(3)
def extract_anime_data(page_url):
response = fetch_page(page_url)
soup = BeautifulSoup(response.content, 'html.parser')
daftar_anime = soup.find('div', class_='animelist')
anime_list = []
for anime in daftar_anime.find_all('li'):
title_element = anime.find('a')
status_element = anime.find('span', class_='color_anilist')
if title_element and status_element:
title = title_element.get_text(strip=True)
status = status_element.get_text(strip=True)
link = title_element['href']
anime_list.append({'title': title, 'status': status, 'link': link})
return anime_list
all_anime_data = []
for page_num in range(1, 8):
page_url = f'https://nimegami.id/anime-list/page/{page_num}/'
anime_data = extract_anime_data(page_url)
all_anime_data.extend(anime_data)
time.sleep(2)
for anime in all_anime_data:
print(f"Title: {anime['title']}, Status: {anime['status']}, Link: {anime['link']}")
from flask import Flask, jsonify, request
from bs4 import BeautifulSoup
import requests, re, time
app = Flask(__name__)
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
}
def fetch_page(url):
while True:
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
return response
except requests.RequestException as e:
print(f"Error fetching {url}: {e}. Retrying in 3 seconds.")
time.sleep(3)
def extract_anime_data(page_url, status_filter=None):
response = fetch_page(page_url)
soup = BeautifulSoup(response.content, 'html.parser')
daftar_anime = soup.find('div', class_='animelist')
anime_list = []
if daftar_anime:
for anime in daftar_anime.find_all('li'):
title_element = anime.find('a')
status_element = anime.find('span', class_='color_anilist')
if title_element and status_element:
title = title_element.get_text(strip=True)
status = status_element.get_text(strip=True)
link = title_element['href']
# Apply status filter if provided
if status_filter is None or status_filter.lower() in status.lower():
anime_list.append({'title': title, 'status': status, 'link': link})
return anime_list
@app.route('/listanime', methods=['GET'])
@app.route('/listanime/', methods=['GET'])
def list_anime():
all_anime_data = []
for page_num in range(1, 8):
page_url = f'https://nimegami.id/anime-list/page/{page_num}/'
anime_data = extract_anime_data(page_url)
all_anime_data.extend(anime_data)
time.sleep(2)
return jsonify(all_anime_data)
@app.route('/listanime/complete', methods=['GET'])
@app.route('/listanime/complete/', methods=['GET'])
def list_anime_complete():
all_anime_data = []
for page_num in range(1, 8):
page_url = f'https://nimegami.id/anime-list/page/{page_num}/'
anime_data = extract_anime_data(page_url, status_filter='Complete')
all_anime_data.extend(anime_data)
time.sleep(2)
return jsonify(all_anime_data)
@app.route('/listanime/ongoing', methods=['GET'])
@app.route('/listanime/ongoing/', methods=['GET'])
def list_anime_ongoing():
all_anime_data = []
for page_num in range(1, 8):
page_url = f'https://nimegami.id/anime-list/page/{page_num}/'
anime_data = extract_anime_data(page_url, status_filter='On-Going')
all_anime_data.extend(anime_data)
time.sleep(2)
return jsonify(all_anime_data)
@app.route('/listanime/<int:page_num>', methods=['GET'])
def list_anime_page(page_num):
if page_num < 1 or page_num > 7:
return jsonify({
'message': 'Page number out of range. Please use a page number between 1 and 7.',
'status': 'error',
'tips': 'Try using a number between 1 and 7.',
'allanime': request.host_url + 'listanime',
'animewithpage': request.host_url + 'listanime/1'
}), 400
page_url = f'https://nimegami.id/anime-list/page/{page_num}/'
anime_data = extract_anime_data(page_url)
return jsonify(anime_data)
@app.errorhandler(404)
def not_found_error(error):
requested_url = request.path
if re.match(r'^/listanime(/)?$', requested_url):
return jsonify({
'message': 'The requested page does not exist. Please use a valid page number.',
'status': str(error),
'tips': 'Visit the main page to see the list of available pages.',
'allanime': request.host_url + 'listanime',
'animewithpage': request.host_url + 'listanime/1'
}), 404
if re.match(r'^/listanime/complete(/)?$', requested_url):
return jsonify({
'message': 'The requested page does not exist. Please use a valid page number.',
'status': str(error),
'tips': 'Visit the complete anime page to see the list of available pages.',
'allanime': request.host_url + 'listanime/complete',
'animewithpage': request.host_url + 'listanime/complete/1'
}), 404
if re.match(r'^/listanime/ongoing(/)?$', requested_url):
return jsonify({
'message': 'The requested page does not exist. Please use a valid page number.',
'status': str(error),
'tips': 'Visit the ongoing anime page to see the list of available pages.',
'allanime': request.host_url + 'listanime/ongoing',
'animewithpage': request.host_url + 'listanime/ongoing/1'
}), 404
return jsonify({
'message': 'The requested resource was not found.',
'status': str(error),
'allanime': request.host_url + 'listanime',
'animewithpage': request.host_url + 'listanime/1',
'tips': 'Check the URL and try again.'
}), 404
if __name__ == '__main__':
app.run(debug=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment