Skip to content

Instantly share code, notes, and snippets.

@jimhester
Last active January 8, 2018 14:50

Revisions

  1. jimhester revised this gist Nov 20, 2013. 1 changed file with 19 additions and 0 deletions.
    19 changes: 19 additions & 0 deletions fasta2.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,19 @@
    #!/usr/bin/env python

    import os,sys

    f = open(sys.argv[1],'rU')
    header = f.readline()
    header = header.rstrip(os.linesep)
    sequences=[]
    for line in f:
    line = line.rstrip('\n')
    if(line[0] == '>'):
    header = header[1:]
    header = line
    print header, len("".join(sequences))
    sequences = []
    else:
    sequences.append(line)

    print header, len("".join(sequences))
  2. jimhester revised this gist May 16, 2013. 9 changed files with 60 additions and 34 deletions.
    7 changes: 1 addition & 6 deletions append.pl
    Original file line number Diff line number Diff line change
    @@ -9,15 +9,10 @@
    chomp $line;
    if($line =~ /^>/){
    $header = substr($header,1);
    ##########
    #Process Record
    ##########
    $header = $line;
    $sequence = ''
    print "$sequence\tlength($sequence)\n";
    } else {
    $sequence .= $line;
    }
    }
    ##########
    #Process Last Record
    ##########
    3 changes: 0 additions & 3 deletions fasta-bioperl.pl
    Original file line number Diff line number Diff line change
    @@ -6,8 +6,5 @@
    my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta');

    while(my $rec = $in->next_seq() ){
    ##########
    #Process record
    ##########
    print join(" ",$rec->display_id,$rec->length)."\n";
    }
    3 changes: 0 additions & 3 deletions fasta-bioperl2.pl
    Original file line number Diff line number Diff line change
    @@ -6,8 +6,5 @@
    my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta', -alphabet => 'dna');

    while(my $rec = $in->next_seq() ){
    ##########
    #Process record
    ##########
    print join(" ",$rec->display_id,$rec->length)."\n";
    }
    3 changes: 0 additions & 3 deletions fasta-biopython.py
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,4 @@
    from Bio import SeqIO

    for record in SeqIO.parse(sys.argv[1],'fasta'):
    ##########
    #Process record
    ##########
    print record.id, len(record)
    5 changes: 1 addition & 4 deletions fasta-bioruby.rb
    Original file line number Diff line number Diff line change
    @@ -4,8 +4,5 @@

    ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF)
    ff.each_entry do |record|
    ##########
    #Process record
    ##########
    puts [record.definition, record.nalen.to_s ].join(" ")
    end
    end
    52 changes: 52 additions & 0 deletions fasta.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,52 @@
    package main

    import (
    "bufio"
    "io"
    "fmt"
    "os"
    "strings"
    )

    type fasta struct {
    header string
    sequence string
    }

    func NewFastxReader(f io.Reader) *FastxReader {
    return &FastxReader{
    r: bufio.NewReader(f),
    }
    }
    type FastxReader struct {
    r *bufio.Reader
    }
    func (r *FastxReader) next_seq() (record fasta, err error) {
    var str string
    if str, err = r.r.ReadString('>'); err == nil {
    if str, err = r.r.ReadString('>'); err == nil {
    split_result := strings.SplitN(str, "\n", 2)
    record.header = split_result[0]
    //remove newlines and trailing >
    record.sequence = chomp(strings.Replace(split_result[1], "\n", "", -1), ">")
    }
    }
    return record, err
    }

    //remove last char in a string if that char is the delim
    func chomp(s string, delim string) string {
    if s[len(s)-1] == delim[0] {
    return s[0:len(s)-1]
    }
    return s
    }

    func main() {
    file, _ := os.Open(os.Args[1])
    fastx = NewFastxReader(file)
    for rec, err := fastx.next_seq(); err == nil; {
    fmt.Println(rec.header, "\t", len(rec.sequence))
    rec, err = fastx.next_seq();
    }
    }
    6 changes: 2 additions & 4 deletions fasta.pl
    Original file line number Diff line number Diff line change
    @@ -10,7 +10,5 @@
    my $header = substr($record,0,$newline_loc);
    my $sequence = substr($record,$newline_loc+1);
    $sequence =~ tr/\n//d;
    ##########
    #Process record
    ##########
    }
    print "$sequence\t" . length($sequence) . "\n";
    }
    9 changes: 2 additions & 7 deletions fasta.py
    Original file line number Diff line number Diff line change
    @@ -10,15 +10,10 @@
    line = line.rstrip('\n')
    if(line[0] == '>'):
    header = header[1:]
    ##########
    #Process Record
    ##########
    header = line
    print header, len(sequence)
    sequence = ''
    else:
    sequence += line

    ##########
    #Process Last Record
    ##########

    print header, len(sequence)
    6 changes: 2 additions & 4 deletions fasta.rb
    Original file line number Diff line number Diff line change
    @@ -8,7 +8,5 @@
    header = rec[0..nl-1]
    seq = rec[nl+1..-1]
    seq.gsub!(/\n/,'')
    ##########
    #Process record
    ##########
    end
    puts [header, seq.length].join(" ")
    end
  3. jimhester revised this gist Aug 20, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion fasta-bioperl2.pl
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    use warnings;use strict;
    use Bio::SeqIO;

    my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta' -alphabet => 'dna');
    my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta', -alphabet => 'dna');

    while(my $rec = $in->next_seq() ){
    ##########
  4. jimhester revised this gist Jul 31, 2012. 2 changed files with 3 additions and 3 deletions.
    4 changes: 2 additions & 2 deletions fasta.pl
    Original file line number Diff line number Diff line change
    @@ -1,8 +1,8 @@
    #!/usr/bin/env perl

    use warnings;use strict;use English;
    use warnings;use strict;

    local $RS = ">";
    local $/ = ">";
    my $first = <>;
    while(my $record = <>){
    chomp $record;
    2 changes: 1 addition & 1 deletion fasta.py
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,7 @@

    import os,sys

    f = open(sys.argv[1],'r')
    f = open(sys.argv[1],'rU')
    header = f.readline()
    header = header.rstrip(os.linesep)
    sequence=''
  5. jimhester revised this gist Jul 31, 2012. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions fasta.py
    Original file line number Diff line number Diff line change
    @@ -17,3 +17,8 @@
    sequence = ''
    else:
    sequence += line

    ##########
    #Process Last Record
    ##########

  6. jimhester revised this gist Jul 24, 2012. 1 changed file with 13 additions and 0 deletions.
    13 changes: 13 additions & 0 deletions fasta-bioperl2.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,13 @@
    #!/usr/bin/env perl

    use warnings;use strict;
    use Bio::SeqIO;

    my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta' -alphabet => 'dna');

    while(my $rec = $in->next_seq() ){
    ##########
    #Process record
    ##########
    print join(" ",$rec->display_id,$rec->length)."\n";
    }
  7. jimhester renamed this gist Jul 24, 2012. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  8. jimhester revised this gist Jul 24, 2012. 3 changed files with 34 additions and 0 deletions.
    13 changes: 13 additions & 0 deletions fasta-bioperl.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,13 @@
    #!/usr/bin/env perl

    use warnings;use strict;
    use Bio::SeqIO;

    my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta');

    while(my $rec = $in->next_seq() ){
    ##########
    #Process record
    ##########
    print join(" ",$rec->display_id,$rec->length)."\n";
    }
    10 changes: 10 additions & 0 deletions fasta-biopython.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    #!/usr/bin/env python

    import sys
    from Bio import SeqIO

    for record in SeqIO.parse(sys.argv[1],'fasta'):
    ##########
    #Process record
    ##########
    print record.id, len(record)
    11 changes: 11 additions & 0 deletions fasta-bioruby.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,11 @@
    #!/usr/bin/env ruby

    require 'bio'

    ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF)
    ff.each_entry do |record|
    ##########
    #Process record
    ##########
    puts [record.definition, record.nalen.to_s ].join(" ")
    end
  9. jimhester revised this gist Jul 24, 2012. 2 changed files with 25 additions and 25 deletions.
    32 changes: 16 additions & 16 deletions fasta.py
    Original file line number Diff line number Diff line change
    @@ -1,19 +1,19 @@
    #!/usr/bin/env python

    import os,sys$
    import os,sys

    f = open(sys.argv[1],'r')$
    header = f.readline()$
    header = header.rstrip(os.linesep)$
    sequence=''$
    for line in f:$
    line = line.rstrip('\n')$
    if(line[0] == '>'):$
    header = header[1:]$
    ##########$
    #Process Record$
    ##########$
    header = line$
    sequence = ''$
    else:$
    sequence += line$
    f = open(sys.argv[1],'r')
    header = f.readline()
    header = header.rstrip(os.linesep)
    sequence=''
    for line in f:
    line = line.rstrip('\n')
    if(line[0] == '>'):
    header = header[1:]
    ##########
    #Process Record
    ##########
    header = line
    sequence = ''
    else:
    sequence += line
    18 changes: 9 additions & 9 deletions fasta.rb
    Original file line number Diff line number Diff line change
    @@ -1,14 +1,14 @@
    #!/usr/bin/env ruby

    $/ = ">"$
    ARGF.gets$
    while rec = ARGF.gets$
    rec.chomp!$
    nl = rec.index("\n")$
    header = rec[0..nl-1]$
    seq = rec[nl+1..-1]$
    seq.gsub!(/\n/,'')$
    $/ = ">"
    ARGF.gets
    while rec = ARGF.gets
    rec.chomp!
    nl = rec.index("\n")
    header = rec[0..nl-1]
    seq = rec[nl+1..-1]
    seq.gsub!(/\n/,'')
    ##########
    #Process record
    ##########
    end$
    end
  10. jimhester revised this gist Jul 24, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion append.pl
    Original file line number Diff line number Diff line change
    @@ -10,7 +10,7 @@
    if($line =~ /^>/){
    $header = substr($header,1);
    ##########
    #Process Recordn
    #Process Record
    ##########
    $header = $line;
    $sequence = ''
  11. @invalid-email-address Anonymous created this gist Jul 24, 2012.
    23 changes: 23 additions & 0 deletions append.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,23 @@
    #!/usr/bin/env perl

    use warnings;use strict;

    my ($header,$sequence);
    $header = <>;
    chomp $header;
    while(my $line = <>){
    chomp $line;
    if($line =~ /^>/){
    $header = substr($header,1);
    ##########
    #Process Recordn
    ##########
    $header = $line;
    $sequence = ''
    } else {
    $sequence .= $line;
    }
    }
    ##########
    #Process Last Record
    ##########
    16 changes: 16 additions & 0 deletions fasta.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,16 @@
    #!/usr/bin/env perl

    use warnings;use strict;use English;

    local $RS = ">";
    my $first = <>;
    while(my $record = <>){
    chomp $record;
    my $newline_loc = index($record,"\n");
    my $header = substr($record,0,$newline_loc);
    my $sequence = substr($record,$newline_loc+1);
    $sequence =~ tr/\n//d;
    ##########
    #Process record
    ##########
    }
    19 changes: 19 additions & 0 deletions fasta.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,19 @@
    #!/usr/bin/env python

    import os,sys$

    f = open(sys.argv[1],'r')$
    header = f.readline()$
    header = header.rstrip(os.linesep)$
    sequence=''$
    for line in f:$
    line = line.rstrip('\n')$
    if(line[0] == '>'):$
    header = header[1:]$
    ##########$
    #Process Record$
    ##########$
    header = line$
    sequence = ''$
    else:$
    sequence += line$
    14 changes: 14 additions & 0 deletions fasta.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,14 @@
    #!/usr/bin/env ruby

    $/ = ">"$
    ARGF.gets$
    while rec = ARGF.gets$
    rec.chomp!$
    nl = rec.index("\n")$
    header = rec[0..nl-1]$
    seq = rec[nl+1..-1]$
    seq.gsub!(/\n/,'')$
    ##########
    #Process record
    ##########
    end$
    11 changes: 11 additions & 0 deletions fasta_example.fa
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,11 @@
    >chr1|this is a fasta header|example
    CCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCTA
    AACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCCTAAACCCTAAACCTAAACCCTGAACCCTAAACCTAAACCC
    TGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCCTGAACCCTAACCCCTGAACCCTAAACCCTGAACCCTAAA
    CCCTGAAACCTGAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCC
    >chr2|another record
    TAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCATGAACCCTAAACCCTGAACCCTAAACCCTAAA
    CCCTGAACCCTAAACCCTGAACCCTAAACCTAAACCCTAAACCCTGAACCCTAAACCTGAACCCTAAACCCCTAAACCTA
    AACCCTGAAACCTAAACCCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTGAAACCCTGAACCCTAAACCATGAA
    CCCTGAACCCTAAACCCTAAACCCTAAACCCTGAACCCTGAACCCTAAACCTAAACCCTGAACCCTGAACCCTAAACCCT
    GAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAA