Created
January 29, 2022 00:00
-
-
Save Jessime/2a72dafbf30daf20a62681602cf2f98f to your computer and use it in GitHub Desktop.
Solving the second degree neighbor problem with Python 3.10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "0e928ad0", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-01-28T23:33:19.195967Z", | |
"start_time": "2022-01-28T23:33:19.053420Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
">s1\r\n", | |
"AAA\r\n", | |
">s2\r\n", | |
"AAG\r\n", | |
">s3\r\n", | |
"AGG\r\n", | |
">s4\r\n", | |
"TTT\r\n" | |
] | |
} | |
], | |
"source": [ | |
"cat ~/Code/jessime/notebooks/sites.fa" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "b26a63da", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-01-28T23:35:00.772046Z", | |
"start_time": "2022-01-28T23:35:00.764197Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from collections import defaultdict\n", | |
"from pathlib import Path\n", | |
"from itertools import pairwise\n", | |
"\n", | |
"\n", | |
"def get_sequences(input_fasta):\n", | |
" \"\"\"Returns sequences from a fasta formatted str.\"\"\"\n", | |
" return Path(input_fasta).read_text().splitlines()[1::2]\n", | |
"\n", | |
"\n", | |
"def are_similar(seq1, seq2, limit=5):\n", | |
" \"\"\"Returns True if two sequences are similar above some threshold.\"\"\"\n", | |
" # prewritten\n", | |
" return sum(1 for c1, c2 in zip(seq1, seq2) if c1 == c2) >= limit\n", | |
"\n", | |
"\n", | |
"def build_graph(seqs):\n", | |
" \"\"\"Returns a graph representing which sequences are similar to each other.\"\"\"\n", | |
" graph = defaultdict(set)\n", | |
" for seq1, seq2 in pairwise(seqs):\n", | |
" if are_similar(seq1, seq2, 2):\n", | |
" graph[seq1].add(seq2)\n", | |
" graph[seq2].add(seq1)\n", | |
" return graph\n", | |
"\n", | |
"\n", | |
"def is_second_degree(graph, seq1, seq2):\n", | |
" \"\"\"Returns True if a sequences is a second degree neighbor to another sequence.\"\"\"\n", | |
" if any((seq1 not in graph, seq2 not in graph, seq2 in graph[seq1])):\n", | |
" return False\n", | |
" return bool(graph[seq1] & graph[seq2])\n", | |
"\n", | |
"\n", | |
"def run(input_fasta, seq1, seq2):\n", | |
" seqs = get_sequences(input_fasta)\n", | |
" graph = build_graph(seqs)\n", | |
" return is_second_degree(graph, seq1, seq2)\n", | |
"\n", | |
" \n", | |
"assert run('sites.fa', \"AAA\", \"AGG\")\n", | |
"assert run('sites.fa', \"AGG\", \"AAA\")\n", | |
"assert not run('sites.fa', \"AAA\", \"AAG\")\n", | |
"assert not run('sites.fa', \"AAG\", \"AGG\")\n", | |
"assert not run('sites.fa', \"AAA\", \"TTT\")\n", | |
"assert not run('sites.fa', \"HEY\", \"TTT\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9dea57b3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6f561ee6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.0" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment