svigneau · August 29, 2015 13:56
diff --git a/tab3col_to_bedgraph.pl b/tab3col_to_bedgraph.pl
 #!/usr/bin/perl

 # Description: This script converts a 3 columns tabular format, where columns are chr, start, value, to bedGraph format. Input file may be compressed as .gz.
 # Coordinates in both input and bedGraph output are assumed to be 0-based (http://genome.ucsc.edu/goldenPath/help/bedgraph.html).

 # Usage: tab3col_to_bedgraph.pl --tab input.tsv --bedgraph output.bedgraph
 # --tab : specify input file in 3 columns tabular format, where columns are chr, start, value.
 # --bedgraph : specify output file in bedgraph format.

 # Credits: This script was written by Sebastien Vigneau ([email protected]) in Alexander Gimelbrant lab (Dana-Farber Cancer Institute).


 use strict;
 use warnings;
 use Getopt::Long;

 my $usage = "Usage: $0 --tab <infile.tsv> --bedgraph <outfile.bedgraph>";

 # Parse command line arguments

 my $infile; # 3 columns input file name
 my $outfile; # bedgraph output file name

 GetOptions (
  "tab=s" => \$infile,
  "bedgraph=s" => \$outfile,
 ) or die ("Error in command line arguments!\n$usage\n");

 # Open input file. If it is compressed with gunzip, uncompress it.

 if ($infile =~ /\.gz$/){
  open(IN,'-|',"gunzip -c $infile") || die "Could not open $infile: $!\n";
 } else {
  open(IN,'<',$infile) || die "Could not open $infile: $!\n";
 }

 # Open output file.

 open(OUT,'>',$outfile) || die "Could not open $outfile: $!\n";


 # Conversion to bedgraph starts here.

 # Declare variables.

 my $chr;
 my $start;
 my $end;
 my $val;
 my $step;

 my $prev_chr;
 my $prev_start;
 my $prev_end;
 my $prev_val;


 while (<IN>) {

  chomp;

  # Skip comment lines
  next if (/^#/);

  # Save previous line information 
  $prev_chr = $chr;
  $prev_start = $start;
  $prev_val = $val;

  # Parse relevant information in current line 
  # e.g: chr1 0 2
  ($chr, $start, $val) = split(/\t/);

  # Continue to next line if first line of file
  next if (! defined $prev_chr);

  # Update step size if current line belongs to same chromosome as previous line.
  # Otherwise, keep step unchanged.
  if ($chr eq $prev_chr) {
    $step = $start - $prev_start;
  }
 
  # Print information for previous line
  $prev_end = $prev_start + $step;
  print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
 }

 # Print last line
 $prev_chr = $chr;
 $prev_start = $start;
 $prev_end = $prev_start + $step;
 $prev_val = $val;
 print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";

 close(IN);
 close(OUT);

 exit(0);
	#!/usr/bin/perl

	# Description: This script converts a 3 columns tabular format, where columns are chr, start, value, to bedGraph format. Input file may be compressed as .gz.
	# Coordinates in both input and bedGraph output are assumed to be 0-based (http://genome.ucsc.edu/goldenPath/help/bedgraph.html).

	# Usage: tab3col_to_bedgraph.pl --tab input.tsv --bedgraph output.bedgraph
	# --tab : specify input file in 3 columns tabular format, where columns are chr, start, value.
	# --bedgraph : specify output file in bedgraph format.

	# Credits: This script was written by Sebastien Vigneau ([email protected]) in Alexander Gimelbrant lab (Dana-Farber Cancer Institute).


	use strict;
	use warnings;
	use Getopt::Long;

	my $usage = "Usage: $0 --tab <infile.tsv> --bedgraph <outfile.bedgraph>";

	# Parse command line arguments

	my $infile; # 3 columns input file name
	my $outfile; # bedgraph output file name

	GetOptions (
	"tab=s" => \$infile,
	"bedgraph=s" => \$outfile,
	) or die ("Error in command line arguments!\n$usage\n");

	# Open input file. If it is compressed with gunzip, uncompress it.

	if ($infile =~ /\.gz$/){
	open(IN,'-\|',"gunzip -c $infile") \|\| die "Could not open $infile: $!\n";
	} else {
	open(IN,'<',$infile) \|\| die "Could not open $infile: $!\n";
	}

	# Open output file.

	open(OUT,'>',$outfile) \|\| die "Could not open $outfile: $!\n";


	# Conversion to bedgraph starts here.

	# Declare variables.

	my $chr;
	my $start;
	my $end;
	my $val;
	my $step;

	my $prev_chr;
	my $prev_start;
	my $prev_end;
	my $prev_val;


	while (<IN>) {

	chomp;

	# Skip comment lines
	next if (/^#/);

	# Save previous line information
	$prev_chr = $chr;
	$prev_start = $start;
	$prev_val = $val;

	# Parse relevant information in current line
	# e.g: chr1 0 2
	($chr, $start, $val) = split(/\t/);

	# Continue to next line if first line of file
	next if (! defined $prev_chr);

	# Update step size if current line belongs to same chromosome as previous line.
	# Otherwise, keep step unchanged.
	if ($chr eq $prev_chr) {
	$step = $start - $prev_start;
	}

	# Print information for previous line
	$prev_end = $prev_start + $step;
	print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
	}

	# Print last line
	$prev_chr = $chr;
	$prev_start = $start;
	$prev_end = $prev_start + $step;
	$prev_val = $val;
	print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";

	close(IN);
	close(OUT);

	exit(0);