Skip to content

Instantly share code, notes, and snippets.

View apcamargo's full-sized avatar
🦖

Antônio Camargo apcamargo

🦖
View GitHub Profile
from pathlib import Path
from typing import Iterator, Optional, Union
import polars as pl
from needletail import parse_fastx_file
from polars.io.plugins import register_io_source
def scan_fastx(fastx_file: Union[str, Path]) -> pl.LazyFrame:
schema = pl.Schema(
@apcamargo
apcamargo / sam2tsv.py
Created March 10, 2025 03:53
Converts alignments stored in the SAM format to a BLAST-like table
#!/usr/bin/env python
"""
This script processes SAM (Sequence Alignment/Map format) inputs from standard
input and extracts alignment information that is then provided in a tab-separated
table. The following fields are produced: query, target, query_length, query_start,
query_end, target_start, target_end, alignment_length, alignment_identity.
This script was designed for use with SAM files produced by minimap2. However,
it will work with any SAM data that:
@apcamargo
apcamargo / calculate_neff.py
Created November 17, 2024 04:59
Calculate the number of effective sequences (Neff) of a A3M multiple sequence alignment
#!/usr/bin/env python
import math
import re
import click
from scipy.cluster.hierarchy import fcluster, linkage
from skbio import DistanceMatrix, Protein, TabularMSA, io
from skbio.sequence.distance import hamming
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from coloraide import Color
def lighten(
color: Color,
amount: float,
) -> Color:
"""
Lighten a color by a given amount.
"""
@apcamargo
apcamargo / retrieve_assembly_accession.py
Created September 19, 2023 21:03
Retrieve NCBI assembly accessions from GenBank accessions using E-utilities
import subprocess
def get_assembly_accession(genbank_accession):
p1 = subprocess.Popen(
["elink", "-db", "nuccore", "-target", "assembly", "-id", genbank_accession],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
p2 = subprocess.Popen(
["efetch", "-format", "docsum"],
#!/usr/bin/env python
# hhblits -v 0 -cpu 1 -n 1 -p 90 -z 0 -Z 5000 -b 0 -B 5000 -M 50 -d busco_db/busco -i msa.faa -o msa.hhr
from collections import namedtuple
from pathlib import Path
import argparse
parser = argparse.ArgumentParser(description='Parse hhsearch hhr output file.')
parser.add_argument('-i', help='input hrr path', dest='input_file',type=str, required=True)
@apcamargo
apcamargo / fancy_fasta_reader.py
Last active August 23, 2024 03:39
Fancy FASTA parser in Python
import bz2
import gzip
import lzma
import textwrap
from contextlib import contextmanager
from enum import Enum, auto
from pathlib import Path
class Compression(Enum):
import bz2
import gzip
import lzma
from contextlib import contextmanager
from enum import Enum, auto
from pathlib import Path
from typing import TextIO
class Compression(Enum):
@apcamargo
apcamargo / ipynb-view.sh
Last active October 25, 2024 21:23
Function for viewing Jupyter notebook (`ipynb`) files in the terminal
# Requires jupytext (https://github.com/mwouts/jupytext) and Glow (https://github.com/charmbracelet/glow)
ipynb-view () {
if [ -z "$1" ]; then
echo "Usage: ipynb-view <notebook.ipynb>";
return 1;
fi;
jupytext --from ipynb --to markdown --output - "$1" | glow --preserve-new-lines --pager
}