Created
January 27, 2020 22:34
-
-
Save KlausTrainer/26c2996cf32677e4c107bd8aaee67794 to your computer and use it in GitHub Desktop.
Converts a VCF SNP file from Dante Labs to the 23andme file format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import vcf | |
""" | |
Converts a VCF SNP file from Dante Labs to the 23andme file format. | |
As the files from Dante Labs don't contain SNP IDs, they are mapped | |
via variant summary data extracted from dbSNP (see | |
https://genome.ucsc.edu/cgi-bin/hgTables). | |
The variant summary data file is expected to have three tab-separated | |
columns, i.e., chromosome, postion, and rsID. | |
""" | |
DB_SNP_FILE = '/home/klaus/Downloads/clinvar-snps.txt' | |
VCF_FILE = '/home/klaus/Downloads/60820188484212.filtered.snp.vcf' | |
snp_dict = {} | |
with open(DB_SNP_FILE, 'r') as snp_file: | |
for line in snp_file: | |
if line[0] == '#': | |
continue | |
(chromosome, position, rsid) = line.rstrip().split('\t') | |
match = re.match('^chr([0-9]+|[MXY])$', chromosome) | |
if not match: | |
continue | |
chromosome_number = match[1] | |
if chromosome_number == 'M': | |
chromosome_number = 'MT' | |
snp_dict[(chromosome_number, int(position))] = rsid | |
print('rsid\tchromosome\tposition\tallele1\tallele2') | |
for record in vcf.Reader(filename=VCF_FILE): | |
key = (record.CHROM, record.POS) | |
if key in snp_dict: | |
record.ID = snp_dict[key] | |
allel_count = record.INFO['AC'][0] if 'AC' in record.INFO else 1 | |
if len(record.ALT) == 2: | |
print('{}\t{}\t{}\t{}\t{}'.format(record.ID, record.CHROM, record.POS, record.ALT[0], record.ALT[1])) | |
elif allel_count == 2: | |
print('{}\t{}\t{}\t{}\t{}'.format(record.ID, record.CHROM, record.POS, record.ALT[0], record.ALT[0])) | |
else: | |
print('{}\t{}\t{}\t{}\t{}'.format(record.ID, record.CHROM, record.POS, record.ALT[0], record.REF)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment