Skip to content

Instantly share code, notes, and snippets.

@terrencehan
Created October 23, 2012 12:02
Show Gist options
  • Save terrencehan/3938401 to your computer and use it in GitHub Desktop.
Save terrencehan/3938401 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use 5.010;
use strict;
use List::MoreUtils qw(pairwise);
use Text::Extract::Word;
sub foo {
sprintf "%.3d", shift;
}
sub handle_section {
my $str = shift;
my ( $gene_name, $_ ) = $str =~ /(.*?\n)((.*\n)*)/;
s/\n//g;
($gene_name) = $gene_name =~ />\s(.*?)\s/;
my ( $icont, @iarray, @earray, @temp );
while (/[a-z]+|[A-Z]+/gc) {
if ( substr( $&, 0, 1 ) ~~ @{ [ 'a' .. 'z' ] } ) {
push @iarray, 'I' . foo( ++$icont ) . $&;
}
else {
push @temp, $&;
}
}
@earray = map {
"E"
. foo( ( $_ + 1 ) . ( $_ + 2 ) )
. substr( $temp[$_], ( length $temp[$_] ) - 10 )
. substr( $temp[ $_ + 1 ], 0, 10 )
} ( 0 .. $#temp - 1 );
say $gene_name;
say for pairwise { ( $a, $b ) } @iarray, @earray;
print "\n";
}
for(glob "*.doc"){
return unless /\.doc/;
my $file = Text::Extract::Word->new($_);
my $text = $file->get_text();
open my $in, "<", \$text;
open STDOUT, ">", "$_.out.txt";
my $section = "";
while (<$in>) {
if (/^\s*$/) {
handle_section($section);
$section = "";
}
else {
$section .= $_;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment