Skip to content

Instantly share code, notes, and snippets.

@idolawoye
Created February 17, 2026 18:51
Show Gist options
  • Select an option

  • Save idolawoye/537036c3bc594f3d4d312a4a32531d2f to your computer and use it in GitHub Desktop.

Select an option

Save idolawoye/537036c3bc594f3d4d312a4a32531d2f to your computer and use it in GitHub Desktop.
Filter multi FASTA file using a list of sequence IDs one per line
#!/usr/bin/env python3
"""
Filter sequences from a multifasta file based on an exclusion list.
Usage:
python filter_fasta.py -i input.fasta -e exclude_ids.txt -o output.fasta
The exclude list should have one sequence ID per line (without the leading '>').
Matching is done against the first word of each FASTA header.
"""
import argparse
def load_exclude_ids(exclude_file):
"""Load sequence IDs to exclude from a text file (one per line)."""
with open(exclude_file, "r") as f:
return set(line.strip().lstrip(">").split()[0] for line in f if line.strip())
def filter_fasta(input_file, exclude_ids, output_file):
"""Write sequences to output, skipping those in exclude_ids."""
excluded = 0
kept = 0
skip = False
with open(input_file, "r") as fin, open(output_file, "w") as fout:
for line in fin:
if line.startswith(">"):
seq_id = line.strip().lstrip(">").split()[0]
skip = seq_id in exclude_ids
if skip:
excluded += 1
else:
kept += 1
if not skip:
fout.write(line)
print(f"Done! Kept: {kept} sequences | Excluded: {excluded} sequences")
print(f"Output written to: {output_file}")
def main():
parser = argparse.ArgumentParser(description="Exclude sequences from a multifasta file.")
parser.add_argument("-i", "--input", required=True, help="Input multifasta file")
parser.add_argument("-e", "--exclude", required=True, help="File with sequence IDs to exclude (one per line)")
parser.add_argument("-o", "--output", required=True, help="Output filtered fasta file")
args = parser.parse_args()
exclude_ids = load_exclude_ids(args.exclude)
print(f"Loaded {len(exclude_ids)} IDs to exclude.")
filter_fasta(args.input, exclude_ids, args.output)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment