Created
January 29, 2023 19:21
-
-
Save mariogeiger/c0d4fb3527c5876543e66793c612ec93 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import logging\n", | |
"import os\n", | |
"import zipfile\n", | |
"from functools import cache\n", | |
"from typing import List\n", | |
"from urllib.request import urlopen\n", | |
"\n", | |
"from ase.atoms import Atoms\n", | |
"\n", | |
"\n", | |
"def download_url(url: str, root: str) -> str:\n", | |
" \"\"\"Download if file does not exist in root already. Returns path to file.\"\"\"\n", | |
" filename = url.rpartition(\"/\")[2]\n", | |
" file_path = os.path.join(root, filename)\n", | |
"\n", | |
" try:\n", | |
" from tqdm import tqdm\n", | |
"\n", | |
" progress = True\n", | |
" except ImportError:\n", | |
" progress = False\n", | |
"\n", | |
" data = urlopen(url)\n", | |
" chunk_size = 1024\n", | |
" total_size = int(data.info()[\"Content-Length\"].strip())\n", | |
"\n", | |
" if os.path.exists(file_path):\n", | |
" if os.path.getsize(file_path) == total_size:\n", | |
" logging.info(f\"Using downloaded and verified file: {file_path}\")\n", | |
" return file_path\n", | |
"\n", | |
" logging.info(f\"Downloading {url} to {file_path}\")\n", | |
"\n", | |
" with open(file_path, \"wb\") as f:\n", | |
" if progress:\n", | |
" with tqdm(total=total_size) as pbar:\n", | |
" while True:\n", | |
" chunk = data.read(chunk_size)\n", | |
" if not chunk:\n", | |
" break\n", | |
" f.write(chunk)\n", | |
" pbar.update(chunk_size)\n", | |
" else:\n", | |
" while True:\n", | |
" chunk = data.read(chunk_size)\n", | |
" if not chunk:\n", | |
" break\n", | |
" f.write(chunk)\n", | |
"\n", | |
" return file_path\n", | |
"\n", | |
"\n", | |
"def extract_zip(path: str, root: str):\n", | |
" \"\"\"Extract zip if content does not exist in root already.\"\"\"\n", | |
" logging.info(f\"Extracting {path} to {root}...\")\n", | |
" with zipfile.ZipFile(path, \"r\") as f:\n", | |
" for name in f.namelist():\n", | |
" if name.endswith(\"/\"):\n", | |
" logging.info(f\"Skip directory {name}\")\n", | |
" continue\n", | |
" out_path = os.path.join(root, name)\n", | |
" file_size = f.getinfo(name).file_size\n", | |
" if os.path.exists(out_path) and os.path.getsize(out_path) == file_size:\n", | |
" logging.info(f\"Skip existing file {name}\")\n", | |
" continue\n", | |
" logging.info(f\"Extracting {name} to {root}...\")\n", | |
" f.extract(name, root)\n", | |
"\n", | |
"\n", | |
"def read_sdf(f):\n", | |
" while True:\n", | |
" name = f.readline()\n", | |
" if not name:\n", | |
" break\n", | |
"\n", | |
" f.readline()\n", | |
" f.readline()\n", | |
"\n", | |
" L1 = f.readline().split()\n", | |
" try:\n", | |
" natoms = int(L1[0])\n", | |
" except IndexError:\n", | |
" print(L1)\n", | |
" break\n", | |
"\n", | |
" positions = []\n", | |
" symbols = []\n", | |
" for _ in range(natoms):\n", | |
" line = f.readline()\n", | |
" x, y, z, symbol = line.split()[:4]\n", | |
" symbols.append(symbol)\n", | |
" positions.append([float(x), float(y), float(z)])\n", | |
"\n", | |
" yield Atoms(symbols=symbols, positions=positions)\n", | |
"\n", | |
" while True:\n", | |
" line = f.readline()\n", | |
" if line.startswith(\"$$$$\"):\n", | |
" break\n", | |
"\n", | |
"\n", | |
"@cache\n", | |
"def load_qm9(root: str) -> List[Atoms]:\n", | |
" raw_url = \"https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/molnet_publish/qm9.zip\"\n", | |
"\n", | |
" if not os.path.exists(root):\n", | |
" os.makedirs(root)\n", | |
"\n", | |
" path = download_url(raw_url, root)\n", | |
" extract_zip(path, root)\n", | |
"\n", | |
" with open(os.path.join(root, \"gdb9.sdf\")) as f:\n", | |
" return list(read_sdf(f))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"66814 molecules with 4 kinds of atoms: O, H, C, N\n", | |
"45770 molecules with 3 kinds of atoms: O, H, C\n", | |
"14198 molecules with 3 kinds of atoms: H, C, N\n", | |
"4907 molecules with 2 kinds of atoms: H, C\n", | |
"1062 molecules with 5 kinds of atoms: H, C, N, O, F\n", | |
"736 molecules with 4 kinds of atoms: F, H, C, N\n", | |
"250 molecules with 4 kinds of atoms: O, F, C, H\n", | |
"96 molecules with 3 kinds of atoms: F, C, H\n", | |
"26 molecules with 3 kinds of atoms: O, C, N\n", | |
"14 molecules with 4 kinds of atoms: O, F, C, N\n", | |
"4 molecules with 2 kinds of atoms: C, N\n", | |
"3 molecules with 3 kinds of atoms: F, C, N\n", | |
"2 molecules with 2 kinds of atoms: F, C\n", | |
"1 molecules with 2 kinds of atoms: H, N\n", | |
"1 molecules with 2 kinds of atoms: O, H\n", | |
"1 molecules with 2 kinds of atoms: O, N\n" | |
] | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"from collections import defaultdict\n", | |
"import ase.data\n", | |
"\n", | |
"kinds = defaultdict(list)\n", | |
"\n", | |
"for mol in load_qm9(\"data\"):\n", | |
" compounds = frozenset(mol.numbers)\n", | |
" kinds[compounds].append(mol)\n", | |
"\n", | |
"for compounds, mols in sorted(kinds.items(), key=lambda x: len(x[1]), reverse=True):\n", | |
" symbols = \", \".join(ase.data.chemical_symbols[z] for z in compounds)\n", | |
" print(f\"{len(mols)} molecules with {len(compounds)} kinds of atoms: {symbols}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<html>\n", | |
"\n", | |
" <head>\n", | |
"\n", | |
" <title>ASE atomic visualization</title>\n", | |
"\n", | |
" <link rel=\"stylesheet\" type=\"text/css\"\n", | |
"\n", | |
" href=\"https://www.x3dom.org/x3dom/release/x3dom.css\">\n", | |
"\n", | |
" </link>\n", | |
"\n", | |
" <script type=\"text/javascript\"\n", | |
"\n", | |
" src=\"https://www.x3dom.org/x3dom/release/x3dom.js\">\n", | |
"\n", | |
" </script>\n", | |
"\n", | |
" </head>\n", | |
"\n", | |
" <body>\n", | |
"\n", | |
" <X3D>\n", | |
"\n", | |
" <Scene>\n", | |
"\n", | |
" <Transform translation=\"-0.02 1.53 0.01\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"0.565 0.565 0.565\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.76\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"0.00 -0.00 0.00\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"0.565 0.565 0.565\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.76\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"0.99 1.94 0.00\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.31\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"-0.54 1.92 -0.87\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.31\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"-0.53 1.91 0.90\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.31\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"0.53 -0.40 0.88\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.31\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"-1.01 -0.42 0.01\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.31\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" <Transform translation=\"0.51 -0.39 -0.89\">\n", | |
"\n", | |
" <Shape>\n", | |
"\n", | |
" <Appearance>\n", | |
"\n", | |
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n", | |
"\n", | |
" </Material>\n", | |
"\n", | |
" </Appearance>\n", | |
"\n", | |
" <Sphere radius=\"0.31\">\n", | |
"\n", | |
" </Sphere>\n", | |
"\n", | |
" </Shape>\n", | |
"\n", | |
" </Transform>\n", | |
"\n", | |
" </Scene>\n", | |
"\n", | |
" </X3D>\n", | |
"\n", | |
" </body>\n", | |
"\n", | |
"</html>\n", | |
"\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from ase.visualize import view\n", | |
"\n", | |
"view(kinds[frozenset([1, 6])][2], viewer=\"x3d\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "base", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.9" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "f26faf9d33dc8b83cd077f62f5d9010e5bc51611e479f12b96223e2da63ba699" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment