zhoujj2013 · July 1, 2014 15:56
diff --git a/StandardizeGeneBankFaFormat.pl b/StandardizeGeneBankFaFormat.pl
 #!/usr/bin/perl -w

 use strict;

 my $f = shift; # input fasta file download from batch Entrez (http://www.ncbi.nlm.nih.gov/sites/batchentrez)
 open IN,"$f" || die $!;
 $/ = ">"; <IN>; $/ = "\n";
 while(<IN>){
 	chomp;
 	my $id = $_;
 	# replace "gi|634859302|gb|KJ524455.1|" > "gi_634859302|gb|KJ524455.1|"
 	$id =~ s/^(gi\|)/gi_/g;
 	
 	# replace "gi_634859302|gb|KJ524455.1|" > "gi_634859302 KJ524455.1|"
 	# you can add more things to this part
 	$id =~ s/(\|gb\|)/ /g;
 	$id =~ s/(\|ref\|)/ /g;
 	$id =~ s/(\|dbj\|)/ /g;
 	
 	# replace "gi_634859302 KJ524455.1|" > "gi_634859302 KJ524455.1"
 	$id =~ s/(\|) / /g;
 	
 	# get seq
 	$/ = ">";
 	my $seq = <IN>;
 	chomp($seq);
 	$seq =~ s/\n//g;
 	$/ = "\n";
 	
 	# stdout
 	print ">$id\n$seq\n";
 }
 close IN;
	#!/usr/bin/perl -w

	use strict;

	my $f = shift; # input fasta file download from batch Entrez (http://www.ncbi.nlm.nih.gov/sites/batchentrez)
	open IN,"$f" \|\| die $!;
	$/ = ">"; <IN>; $/ = "\n";
	while(<IN>){
	chomp;
	my $id = $_;
	# replace "gi\|634859302\|gb\|KJ524455.1\|" > "gi_634859302\|gb\|KJ524455.1\|"
	$id =~ s/^(gi\\|)/gi_/g;

	# replace "gi_634859302\|gb\|KJ524455.1\|" > "gi_634859302 KJ524455.1\|"
	# you can add more things to this part
	$id =~ s/(\\|gb\\|)/ /g;
	$id =~ s/(\\|ref\\|)/ /g;
	$id =~ s/(\\|dbj\\|)/ /g;

	# replace "gi_634859302 KJ524455.1\|" > "gi_634859302 KJ524455.1"
	$id =~ s/(\\|) / /g;

	# get seq
	$/ = ">";
	my $seq = <IN>;
	chomp($seq);
	$seq =~ s/\n//g;
	$/ = "\n";

	# stdout
	print ">$id\n$seq\n";
	}
	close IN;