Last active
August 4, 2024 10:04
-
-
Save simin75simin/06d13d1fad9bf11b56278ff4b5b75f7e to your computer and use it in GitHub Desktop.
PE-oeis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"<>:2: SyntaxWarning: invalid escape sequence '\\s'\n", | |
"<>:2: SyntaxWarning: invalid escape sequence '\\s'\n", | |
"C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_9396\\1673588957.py:2: SyntaxWarning: invalid escape sequence '\\s'\n", | |
" with open('data\\solved_problem_ids.txt','r') as f:\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"(151, ['1', '2', '3', '6', '5'])" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"# with open('data\\solved_problem_ids.txt','r') as f:\n", | |
"# solved_problem_ids = f.read().splitlines()\n", | |
"# solved_problem_ids = [x for x in solved_problem_ids if x.isnumeric()]\n", | |
"# len(solved_problem_ids), solved_problem_ids[:5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "e1b8d517af9f46a78a6b90cbd2277271", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
" 0%| | 0/2 [00:00<?, ?it/s]" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# # crawl solved solutions....just dont post it poblicly...\n", | |
"# cookies = {'PHPSESSID': 'YOUR_PHPSE'}\n", | |
"# for i in tqdm(solved_problem_ids[-2:]):\n", | |
"# attempt=0\n", | |
"# while attempt<5:\n", | |
"# try:\n", | |
"# u=f'https://projecteuler.net/thread={i};page=1'\n", | |
"# r=requests.get(u, cookies=cookies)\n", | |
"# if r.status_code != 200:\n", | |
"# print(f'Error: {r.status_code} {u}')\n", | |
"# continue\n", | |
"# idx=r.text.rfind('page=')\n", | |
"# if idx==-1:\n", | |
"# pageno=1\n", | |
"# else:\n", | |
"# pageno=-1\n", | |
"# for j in range(idx+5,idx+10):\n", | |
"# if not r.text[j].isnumeric():\n", | |
"# pageno=int(r.text[idx+5:j])\n", | |
"# break\n", | |
"# for p in range(1,pageno+1):\n", | |
"# u=f'https://projecteuler.net/thread={i};page={str(p)}'\n", | |
"# r=requests.get(u, cookies=cookies)\n", | |
"# if r.status_code != 200:\n", | |
"# print(f'Error: {r.status_code} {u}')\n", | |
"# continue\n", | |
"# with open(f'results/problem_{i}_page_{p}.html','w') as f:\n", | |
"# f.write(r.text)\n", | |
"# break\n", | |
"# except Exception as e:\n", | |
"# attempt+=1\n", | |
"# print(f'Error: {e} {u}')\n", | |
"# continue" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# problem stmt crawler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# crawl problem statements\n", | |
"for i in tqdm(solved_problem_ids):\n", | |
" attempt=0\n", | |
" while attempt<5:\n", | |
" try:\n", | |
" u=f'https://projecteuler.net/minimal={i}\n", | |
" r=requests.get(u, cookies=cookies)\n", | |
" with open(f'problem_statements_html/problem_{i}.html','w') as f:\n", | |
" f.write(r.text)\n", | |
" break\n", | |
" except Exception as e:\n", | |
" attempt+=1\n", | |
" print(f'Error: {e} {u}')\n", | |
" continue" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# oeis check..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"import re\n", | |
"import time\n", | |
"from numpy.random import choice\n", | |
"import random\n", | |
"\n", | |
"def extract_big_integers(input_string):\n", | |
" # Regular expression to find integers with more than 6 digits\n", | |
" pattern = r'\\b\\d{7,}\\b'\n", | |
" \n", | |
" # Find all matches in the input string\n", | |
" matches = re.findall(pattern, input_string)\n", | |
" \n", | |
" # Convert matches to integers\n", | |
" big_integers = [int(match) for match in matches]\n", | |
" \n", | |
" return big_integers\n", | |
"\n", | |
"def get_oeis(problem):\n", | |
" path=f'problem_statements_html\\problem_{problem}.html'\n", | |
"\n", | |
" with open(path, 'r',encoding='cp1252') as file:\n", | |
" input_string = file.read()\n", | |
" big_integers = extract_big_integers(input_string)\n", | |
" if len(big_integers)>5:\n", | |
" big_integers=choice(big_integers, 5)\n", | |
"\n", | |
" result=''\n", | |
" for i in big_integers:\n", | |
" url=f'https://oeis.org/search?q={i}&go=Search'\n", | |
" response = requests.get(url)\n", | |
" html = response.text\n", | |
" if \"Sorry, but the terms do not match anything in the table.\" in html:\n", | |
" continue\n", | |
" if not html:\n", | |
" print('NOTHING')\n", | |
" time.sleep(5)\n", | |
" result+=html\n", | |
" time.sleep(random.uniform(0.5, 1.5))\n", | |
"\n", | |
" if not result:\n", | |
" return\n", | |
" \n", | |
" with open(f'oeis/oeis_{problem}.html', 'w',encoding='utf-8') as file:\n", | |
" file.write(result)\n", | |
"\n", | |
"from tqdm.notebook import tqdm\n", | |
"for i in tqdm(range(101, 901)):\n", | |
" attempts=0\n", | |
" while attempts<5:\n", | |
" try:\n", | |
" get_oeis(i)\n", | |
" break\n", | |
" except Exception as e:\n", | |
" attempts+=1\n", | |
" print(f\"Error in problem {i}: {e}, attempt {attempts}\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment