Last active
February 26, 2016 01:36
-
-
Save ShujiaHuang/0ffc43e06b525282b0a3 to your computer and use it in GitHub Desktop.
用于判定窗口长度,完成窗口定位和窗口内甲基化率计算,常用于甲基化Canonical分析
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Author : Shujia Huang | |
#Date : 2010/11/27 | |
#!/usr/bin/perl -w | |
use strict; | |
use warnings; | |
my ( $file, $outfile_prefix, @bin_num ) = @ARGV; | |
my %region2num = ( "1000upstream" => 0, "first-exon" => 1, "intron" => 2, | |
"mid-exon" => 3, "last-exon" => 4, "1000downstream" => 5 ); | |
my %final_data; | |
my @inf; | |
open ( I, $file ) or die "Cannot open : $file\n"; | |
#1000upstream supercont2.12 + ureg_07933 1 737 0 67 CHH CAT 1.00 0 1 2 | |
#1000upstream supercont2.12 + ureg_07933 1 737 0 100 CHH CAC 1.00 0 1 2 | |
#1000upstream supercont2.12 + ureg_07933 1 737 0 102 CG CG 1.00 0 1 2 | |
while ( <I> ) { | |
chomp; | |
@inf = split ( /\s+/, $_ ); | |
$inf[0] = lc ( $inf[0] ); ##For any contingency | |
next if ( $inf[0] =~ /utr/i ); | |
my @bin_length = decided_bin_length ( $inf[4], $inf[5], $bin_num[ $region2num{$inf[0]} ] ); | |
my $win = decided_win ( $inf[4], $inf[5], $inf[7], \@bin_length); ## Window number counts from 0; | |
$win = ( $inf[2] eq '-') ? ( $bin_num[ $region2num{$inf[0]} ] - $win - 1 ) : $win; | |
my $region = $region2num{ $inf[0] }; | |
my $pattern = join "",( (split (//, $inf[-5]))[0,1] ); | |
$pattern = uc ($pattern); | |
$final_data{$pattern}{$inf[3]}{$region}[$win]->[0] += $inf[-3]; | |
$final_data{$pattern}{$inf[3]}{$region}[$win]->[1] += $inf[-2]; | |
$final_data{"all"}{$inf[3]}{$region}[$win]->[0] += $inf[-3]; | |
$final_data{"all"}{$inf[3]}{$region}[$win]->[1] += $inf[-2]; | |
} | |
close ( I ); | |
#### Display | |
my %num2region = reverse %region2num; | |
for my $pattern ( keys %final_data ) { | |
my @gene_list = keys %{ $final_data{$pattern} }; | |
my $site = 0; | |
open ( O, ">$outfile_prefix.$pattern" ) or die "Cannot write to file : $outfile_prefix.$pattern\n"; | |
print O join "\t",( "region", "Window", @gene_list );print O "\n"; | |
for my $region ( sort { $a<=> $b }keys %num2region ) { | |
for (my $i = 0; $i < $bin_num[$region]; ++$i ) { | |
++$site; | |
print O join "\t", ( $num2region{$region}, $site ); | |
for my $gene ( @gene_list ) { | |
if ( !exists $final_data{$pattern}{$gene}{$region}[$i] ){ | |
$final_data{$pattern}{$gene}{$region}[$i] = [0,0]; | |
} | |
my $met = $final_data{$pattern}{$gene}{$region}[$i]->[0]; | |
my $umet = $final_data{$pattern}{$gene}{$region}[$i]->[1]; | |
my $met_level = ( $met + $umet ) ? $met / ( $met + $umet ) : -1; | |
print O "\t$met_level"; | |
} | |
print O "\n"; | |
} | |
} | |
close ( O ); | |
} | |
################################# | |
######## Sub Function ########### | |
sub decided_bin_length { | |
my ( $start, $end, $bin_num ) = @_; | |
die "Region ERROR\n" if ( $end < $start ); | |
my $length = $end - $start; | |
my $length_per_bin = int ( $length / $bin_num ); | |
my $remainder = $length % $bin_num; | |
my @bin_length = ( $length_per_bin ) x $bin_num; | |
if ( $length < $bin_num ) { | |
@bin_length[0..($length-1)] = (1) x $length; | |
return @bin_length; ## Must return the array value now! Don't contiune or it'll get wrong result! | |
} | |
for ( my $i = 1; $i <= $remainder; ++$i ) { | |
$bin_length[$i - 1]++; | |
} | |
return @bin_length; | |
} | |
sub decided_win { | |
my ( $region_start, $region_end, $cout_site, $bin_length ) = @_; | |
my $delta = $cout_site - $region_start; | |
my $length = 0; | |
my $win; | |
my $i; | |
for ( $i = 0; $i < @$bin_length ; ++$i ) { | |
if ( $delta >= $length ) { | |
$length += $$bin_length[$i]; | |
} else { | |
$win = ( $i > 0 ) ? $i - 1 : 0 if ( $delta >= 0); | |
last; | |
} | |
} | |
if ( $cout_site <= $region_end ) { | |
if ( !defined $win ) { | |
$win = $i - 1; | |
} | |
} | |
die "Out of region\nRegion: $region_start - $region_end\nCout file site: $cout_site\n" if ( !defined $win); | |
return $win; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:perl pure_data.pl inputfile output 50 10 20 10 10 50