Skip to content

Instantly share code, notes, and snippets.

@jimhester
Last active January 8, 2018 14:50
Show Gist options
  • Save jimhester/3169859 to your computer and use it in GitHub Desktop.
Save jimhester/3169859 to your computer and use it in GitHub Desktop.
Parsing fasta files in perl ruby python and go
#!/usr/bin/env perl
use warnings;use strict;
my ($header,$sequence);
$header = <>;
chomp $header;
while(my $line = <>){
chomp $line;
if($line =~ /^>/){
$header = substr($header,1);
##########
#Process Record
##########
$header = $line;
$sequence = ''
} else {
$sequence .= $line;
}
}
##########
#Process Last Record
##########
#!/usr/bin/env perl
use warnings;use strict;
use Bio::SeqIO;
my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta');
while(my $rec = $in->next_seq() ){
##########
#Process record
##########
print join(" ",$rec->display_id,$rec->length)."\n";
}
#!/usr/bin/env perl
use warnings;use strict;
use Bio::SeqIO;
my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta' -alphabet => 'dna');
while(my $rec = $in->next_seq() ){
##########
#Process record
##########
print join(" ",$rec->display_id,$rec->length)."\n";
}
#!/usr/bin/env python
import sys
from Bio import SeqIO
for record in SeqIO.parse(sys.argv[1],'fasta'):
##########
#Process record
##########
print record.id, len(record)
#!/usr/bin/env ruby
require 'bio'
ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF)
ff.each_entry do |record|
##########
#Process record
##########
puts [record.definition, record.nalen.to_s ].join(" ")
end
#!/usr/bin/env perl
use warnings;use strict;use English;
local $RS = ">";
my $first = <>;
while(my $record = <>){
chomp $record;
my $newline_loc = index($record,"\n");
my $header = substr($record,0,$newline_loc);
my $sequence = substr($record,$newline_loc+1);
$sequence =~ tr/\n//d;
##########
#Process record
##########
}
#!/usr/bin/env python
import os,sys
f = open(sys.argv[1],'r')
header = f.readline()
header = header.rstrip(os.linesep)
sequence=''
for line in f:
line = line.rstrip('\n')
if(line[0] == '>'):
header = header[1:]
##########
#Process Record
##########
header = line
sequence = ''
else:
sequence += line
##########
#Process Last Record
##########
#!/usr/bin/env ruby
$/ = ">"
ARGF.gets
while rec = ARGF.gets
rec.chomp!
nl = rec.index("\n")
header = rec[0..nl-1]
seq = rec[nl+1..-1]
seq.gsub!(/\n/,'')
##########
#Process record
##########
end
>chr1|this is a fasta header|example
CCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCTA
AACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCCTAAACCCTAAACCTAAACCCTGAACCCTAAACCTAAACCC
TGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCCTGAACCCTAACCCCTGAACCCTAAACCCTGAACCCTAAA
CCCTGAAACCTGAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCC
>chr2|another record
TAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCATGAACCCTAAACCCTGAACCCTAAACCCTAAA
CCCTGAACCCTAAACCCTGAACCCTAAACCTAAACCCTAAACCCTGAACCCTAAACCTGAACCCTAAACCCCTAAACCTA
AACCCTGAAACCTAAACCCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTGAAACCCTGAACCCTAAACCATGAA
CCCTGAACCCTAAACCCTAAACCCTAAACCCTGAACCCTGAACCCTAAACCTAAACCCTGAACCCTGAACCCTAAACCCT
GAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAA
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment