Skip to content

Instantly share code, notes, and snippets.

@simin75simin
Last active August 4, 2024 10:04
Show Gist options
  • Save simin75simin/06d13d1fad9bf11b56278ff4b5b75f7e to your computer and use it in GitHub Desktop.
Save simin75simin/06d13d1fad9bf11b56278ff4b5b75f7e to your computer and use it in GitHub Desktop.
PE-oeis
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<>:2: SyntaxWarning: invalid escape sequence '\\s'\n",
"<>:2: SyntaxWarning: invalid escape sequence '\\s'\n",
"C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_9396\\1673588957.py:2: SyntaxWarning: invalid escape sequence '\\s'\n",
" with open('data\\solved_problem_ids.txt','r') as f:\n"
]
},
{
"data": {
"text/plain": [
"(151, ['1', '2', '3', '6', '5'])"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"# with open('data\\solved_problem_ids.txt','r') as f:\n",
"# solved_problem_ids = f.read().splitlines()\n",
"# solved_problem_ids = [x for x in solved_problem_ids if x.isnumeric()]\n",
"# len(solved_problem_ids), solved_problem_ids[:5]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e1b8d517af9f46a78a6b90cbd2277271",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# # crawl solved solutions....just dont post it poblicly...\n",
"# cookies = {'PHPSESSID': 'YOUR_PHPSE'}\n",
"# for i in tqdm(solved_problem_ids[-2:]):\n",
"# attempt=0\n",
"# while attempt<5:\n",
"# try:\n",
"# u=f'https://projecteuler.net/thread={i};page=1'\n",
"# r=requests.get(u, cookies=cookies)\n",
"# if r.status_code != 200:\n",
"# print(f'Error: {r.status_code} {u}')\n",
"# continue\n",
"# idx=r.text.rfind('page=')\n",
"# if idx==-1:\n",
"# pageno=1\n",
"# else:\n",
"# pageno=-1\n",
"# for j in range(idx+5,idx+10):\n",
"# if not r.text[j].isnumeric():\n",
"# pageno=int(r.text[idx+5:j])\n",
"# break\n",
"# for p in range(1,pageno+1):\n",
"# u=f'https://projecteuler.net/thread={i};page={str(p)}'\n",
"# r=requests.get(u, cookies=cookies)\n",
"# if r.status_code != 200:\n",
"# print(f'Error: {r.status_code} {u}')\n",
"# continue\n",
"# with open(f'results/problem_{i}_page_{p}.html','w') as f:\n",
"# f.write(r.text)\n",
"# break\n",
"# except Exception as e:\n",
"# attempt+=1\n",
"# print(f'Error: {e} {u}')\n",
"# continue"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# problem stmt crawler"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# crawl problem statements\n",
"for i in tqdm(solved_problem_ids):\n",
" attempt=0\n",
" while attempt<5:\n",
" try:\n",
" u=f'https://projecteuler.net/minimal={i}\n",
" r=requests.get(u, cookies=cookies)\n",
" with open(f'problem_statements_html/problem_{i}.html','w') as f:\n",
" f.write(r.text)\n",
" break\n",
" except Exception as e:\n",
" attempt+=1\n",
" print(f'Error: {e} {u}')\n",
" continue"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# oeis check..."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import re\n",
"import time\n",
"from numpy.random import choice\n",
"import random\n",
"\n",
"def extract_big_integers(input_string):\n",
" # Regular expression to find integers with more than 6 digits\n",
" pattern = r'\\b\\d{7,}\\b'\n",
" \n",
" # Find all matches in the input string\n",
" matches = re.findall(pattern, input_string)\n",
" \n",
" # Convert matches to integers\n",
" big_integers = [int(match) for match in matches]\n",
" \n",
" return big_integers\n",
"\n",
"def get_oeis(problem):\n",
" path=f'problem_statements_html\\problem_{problem}.html'\n",
"\n",
" with open(path, 'r',encoding='cp1252') as file:\n",
" input_string = file.read()\n",
" big_integers = extract_big_integers(input_string)\n",
" if len(big_integers)>5:\n",
" big_integers=choice(big_integers, 5)\n",
"\n",
" result=''\n",
" for i in big_integers:\n",
" url=f'https://oeis.org/search?q={i}&go=Search'\n",
" response = requests.get(url)\n",
" html = response.text\n",
" if \"Sorry, but the terms do not match anything in the table.\" in html:\n",
" continue\n",
" if not html:\n",
" print('NOTHING')\n",
" time.sleep(5)\n",
" result+=html\n",
" time.sleep(random.uniform(0.5, 1.5))\n",
"\n",
" if not result:\n",
" return\n",
" \n",
" with open(f'oeis/oeis_{problem}.html', 'w',encoding='utf-8') as file:\n",
" file.write(result)\n",
"\n",
"from tqdm.notebook import tqdm\n",
"for i in tqdm(range(101, 901)):\n",
" attempts=0\n",
" while attempts<5:\n",
" try:\n",
" get_oeis(i)\n",
" break\n",
" except Exception as e:\n",
" attempts+=1\n",
" print(f\"Error in problem {i}: {e}, attempt {attempts}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment