Last active
January 8, 2018 14:50
Revisions
-
jimhester revised this gist
Nov 20, 2013 . 1 changed file with 19 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,19 @@ #!/usr/bin/env python import os,sys f = open(sys.argv[1],'rU') header = f.readline() header = header.rstrip(os.linesep) sequences=[] for line in f: line = line.rstrip('\n') if(line[0] == '>'): header = header[1:] header = line print header, len("".join(sequences)) sequences = [] else: sequences.append(line) print header, len("".join(sequences)) -
jimhester revised this gist
May 16, 2013 . 9 changed files with 60 additions and 34 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,15 +9,10 @@ chomp $line; if($line =~ /^>/){ $header = substr($header,1); $header = $line; $sequence = '' print "$sequence\tlength($sequence)\n"; } else { $sequence .= $line; } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,8 +6,5 @@ my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta'); while(my $rec = $in->next_seq() ){ print join(" ",$rec->display_id,$rec->length)."\n"; } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,8 +6,5 @@ my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta', -alphabet => 'dna'); while(my $rec = $in->next_seq() ){ print join(" ",$rec->display_id,$rec->length)."\n"; } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,7 +4,4 @@ from Bio import SeqIO for record in SeqIO.parse(sys.argv[1],'fasta'): print record.id, len(record) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,8 +4,5 @@ ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF) ff.each_entry do |record| puts [record.definition, record.nalen.to_s ].join(" ") end This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,52 @@ package main import ( "bufio" "io" "fmt" "os" "strings" ) type fasta struct { header string sequence string } func NewFastxReader(f io.Reader) *FastxReader { return &FastxReader{ r: bufio.NewReader(f), } } type FastxReader struct { r *bufio.Reader } func (r *FastxReader) next_seq() (record fasta, err error) { var str string if str, err = r.r.ReadString('>'); err == nil { if str, err = r.r.ReadString('>'); err == nil { split_result := strings.SplitN(str, "\n", 2) record.header = split_result[0] //remove newlines and trailing > record.sequence = chomp(strings.Replace(split_result[1], "\n", "", -1), ">") } } return record, err } //remove last char in a string if that char is the delim func chomp(s string, delim string) string { if s[len(s)-1] == delim[0] { return s[0:len(s)-1] } return s } func main() { file, _ := os.Open(os.Args[1]) fastx = NewFastxReader(file) for rec, err := fastx.next_seq(); err == nil; { fmt.Println(rec.header, "\t", len(rec.sequence)) rec, err = fastx.next_seq(); } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,7 +10,5 @@ my $header = substr($record,0,$newline_loc); my $sequence = substr($record,$newline_loc+1); $sequence =~ tr/\n//d; print "$sequence\t" . length($sequence) . "\n"; } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,15 +10,10 @@ line = line.rstrip('\n') if(line[0] == '>'): header = header[1:] header = line print header, len(sequence) sequence = '' else: sequence += line print header, len(sequence) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,5 @@ header = rec[0..nl-1] seq = rec[nl+1..-1] seq.gsub!(/\n/,'') puts [header, seq.length].join(" ") end -
jimhester revised this gist
Aug 20, 2012 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,7 @@ use warnings;use strict; use Bio::SeqIO; my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta', -alphabet => 'dna'); while(my $rec = $in->next_seq() ){ ########## -
jimhester revised this gist
Jul 31, 2012 . 2 changed files with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,8 +1,8 @@ #!/usr/bin/env perl use warnings;use strict; local $/ = ">"; my $first = <>; while(my $record = <>){ chomp $record; This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,7 +2,7 @@ import os,sys f = open(sys.argv[1],'rU') header = f.readline() header = header.rstrip(os.linesep) sequence='' -
jimhester revised this gist
Jul 31, 2012 . 1 changed file with 5 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -17,3 +17,8 @@ sequence = '' else: sequence += line ########## #Process Last Record ########## -
jimhester revised this gist
Jul 24, 2012 . 1 changed file with 13 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,13 @@ #!/usr/bin/env perl use warnings;use strict; use Bio::SeqIO; my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta' -alphabet => 'dna'); while(my $rec = $in->next_seq() ){ ########## #Process record ########## print join(" ",$rec->display_id,$rec->length)."\n"; } -
jimhester renamed this gist
Jul 24, 2012 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
jimhester revised this gist
Jul 24, 2012 . 3 changed files with 34 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,13 @@ #!/usr/bin/env perl use warnings;use strict; use Bio::SeqIO; my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta'); while(my $rec = $in->next_seq() ){ ########## #Process record ########## print join(" ",$rec->display_id,$rec->length)."\n"; } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,10 @@ #!/usr/bin/env python import sys from Bio import SeqIO for record in SeqIO.parse(sys.argv[1],'fasta'): ########## #Process record ########## print record.id, len(record) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,11 @@ #!/usr/bin/env ruby require 'bio' ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF) ff.each_entry do |record| ########## #Process record ########## puts [record.definition, record.nalen.to_s ].join(" ") end -
jimhester revised this gist
Jul 24, 2012 . 2 changed files with 25 additions and 25 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,19 +1,19 @@ #!/usr/bin/env python import os,sys f = open(sys.argv[1],'r') header = f.readline() header = header.rstrip(os.linesep) sequence='' for line in f: line = line.rstrip('\n') if(line[0] == '>'): header = header[1:] ########## #Process Record ########## header = line sequence = '' else: sequence += line This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,14 +1,14 @@ #!/usr/bin/env ruby $/ = ">" ARGF.gets while rec = ARGF.gets rec.chomp! nl = rec.index("\n") header = rec[0..nl-1] seq = rec[nl+1..-1] seq.gsub!(/\n/,'') ########## #Process record ########## end -
jimhester revised this gist
Jul 24, 2012 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,7 +10,7 @@ if($line =~ /^>/){ $header = substr($header,1); ########## #Process Record ########## $header = $line; $sequence = '' -
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,23 @@ #!/usr/bin/env perl use warnings;use strict; my ($header,$sequence); $header = <>; chomp $header; while(my $line = <>){ chomp $line; if($line =~ /^>/){ $header = substr($header,1); ########## #Process Recordn ########## $header = $line; $sequence = '' } else { $sequence .= $line; } } ########## #Process Last Record ########## This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,16 @@ #!/usr/bin/env perl use warnings;use strict;use English; local $RS = ">"; my $first = <>; while(my $record = <>){ chomp $record; my $newline_loc = index($record,"\n"); my $header = substr($record,0,$newline_loc); my $sequence = substr($record,$newline_loc+1); $sequence =~ tr/\n//d; ########## #Process record ########## } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,19 @@ #!/usr/bin/env python import os,sys$ f = open(sys.argv[1],'r')$ header = f.readline()$ header = header.rstrip(os.linesep)$ sequence=''$ for line in f:$ line = line.rstrip('\n')$ if(line[0] == '>'):$ header = header[1:]$ ##########$ #Process Record$ ##########$ header = line$ sequence = ''$ else:$ sequence += line$ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,14 @@ #!/usr/bin/env ruby $/ = ">"$ ARGF.gets$ while rec = ARGF.gets$ rec.chomp!$ nl = rec.index("\n")$ header = rec[0..nl-1]$ seq = rec[nl+1..-1]$ seq.gsub!(/\n/,'')$ ########## #Process record ########## end$ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,11 @@ >chr1|this is a fasta header|example CCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCTA AACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCCTAAACCCTAAACCTAAACCCTGAACCCTAAACCTAAACCC TGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCCTGAACCCTAACCCCTGAACCCTAAACCCTGAACCCTAAA CCCTGAAACCTGAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCC >chr2|another record TAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCATGAACCCTAAACCCTGAACCCTAAACCCTAAA CCCTGAACCCTAAACCCTGAACCCTAAACCTAAACCCTAAACCCTGAACCCTAAACCTGAACCCTAAACCCCTAAACCTA AACCCTGAAACCTAAACCCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTGAAACCCTGAACCCTAAACCATGAA CCCTGAACCCTAAACCCTAAACCCTAAACCCTGAACCCTGAACCCTAAACCTAAACCCTGAACCCTGAACCCTAAACCCT GAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAA