Last active
October 26, 2018 09:08
-
-
Save holgerbrandl/3d88c11519340ce915f964d0845d4c91 to your computer and use it in GitHub Desktop.
Rewrites a big-wig file while adding a 'chr' prefix to the chromosome names. Requires `kscript`.Usage `add_chr_prefix_to_bw.kts chrom_sizes.fai in.bw out.bw`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env kscript | |
// kshell_kts.sh /Users/brandl/.kscript/kscript_tmp_project__adjust_bigwig_ids.kts_1540471113973/src/adjust_bigwig_ids.kts | |
@file:DependsOn("org.jetbrains.bio:big:0.8.3") | |
@file:DependsOn("com.xenomachina:kotlin-argparser:2.0.7") | |
import com.xenomachina.argparser.ArgParser | |
import com.xenomachina.argparser.DefaultHelpFormatter | |
import com.xenomachina.argparser.mainBody | |
import org.apache.log4j.ConsoleAppender | |
import org.apache.log4j.Level | |
import org.apache.log4j.Logger | |
import org.apache.log4j.SimpleLayout | |
import org.jetbrains.bio.big.BedGraphSection | |
import org.jetbrains.bio.big.BigWigFile | |
import org.jetbrains.bio.big.FixedStepSection | |
import org.jetbrains.bio.big.VariableStepSection | |
import java.io.File | |
import java.nio.file.Files | |
import java.nio.file.Paths | |
// For details https://github.com/JetBrains-Research/big and in particular https://github.com/JetBrains-Research/big/issues/40 | |
class MyArgs(parser: ArgParser) { | |
val chromSizes by parser.positional("chrom_sizes") { File(this) } | |
val bwInput by parser.positional("bw_input") { File(this) } | |
val bwOutput by parser.positional("bw_output") { File(this) } | |
} | |
// see https://github.com/xenomachina/kotlin-argparser/issues/65 | |
val parsedArgs = mainBody { ArgParser(args, helpFormatter = DefaultHelpFormatter("")).parseInto(::MyArgs) } | |
//TODO also support species genome build names (mm10, hg19) instead along chrom_size file arguments | |
// E.g. /Volumes/plantx/planmine_v3/dd_Smes_v2/mine_export/dd_Smes_v2.pcf.contigs.fasta.fai /Users/brandl/Desktop/dd_Smes_v2.bw /Users/brandl/Desktop/dd_Smes_v2_new.bw | |
// E.g. download from http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.chrom.sizes | |
val chrSizesPath = Paths.get(parsedArgs.chromSizes.toURI()) | |
val inputPath = Paths.get(parsedArgs.bwInput.toURI()) | |
val outputPath = Paths.get(parsedArgs.bwOutput.toURI()) | |
val LOG = Logger.getLogger("big_wig_id_converter") | |
// Optional logger | |
Logger.getRootLogger().apply { | |
addAppender(ConsoleAppender(SimpleLayout())) | |
level = Level.INFO | |
} | |
LOG.info("Loading chromosome sizes..") | |
val chromSizes = Files.readAllLines(chrSizesPath) | |
.asSequence() | |
.map { line -> | |
val chunks = line.split("\t", limit = 10) | |
// require(chunks.size == 2) | |
chunks[0] to chunks[1].toInt() | |
}.toList() | |
val chromosomes = chromSizes.asSequence().map { it.first } | |
LOG.info("Opening BigFile...") | |
BigWigFile.read(inputPath).use { bwInput -> | |
val compression = bwInput.compression | |
LOG.info("Loading Wig sections...") | |
val wigSections = bwInput.chromosomes | |
.valueCollection() | |
.flatMap { chr -> | |
require(chr in chromosomes) { | |
"Chromosome length isn't specified for chromosome: $chr" | |
} | |
bwInput.query(chr) | |
} | |
LOG.info(" Done") | |
LOG.info("Converting Wig sections...") | |
val updatedWigSections = wigSections.map { section -> | |
val fixedChrName = "chr${section.chrom}" | |
val e = when (section) { | |
is FixedStepSection -> section.copy(chrom = fixedChrName) | |
is VariableStepSection -> section.copy(chrom = fixedChrName) | |
is BedGraphSection -> section.copy(chrom = fixedChrName) | |
else -> error("Unknown section type: ${section.javaClass}") | |
} | |
e | |
} | |
LOG.info(" Done") | |
LOG.info("Writing Wig sections...") | |
BigWigFile.write(updatedWigSections, chromSizes, outputPath, compression = compression) | |
LOG.info(" Done") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment