wangfan860 · May 13, 2019 15:55 · wangfan860 · May 7, 2019
diff --git a/maftools.r b/maftools.r
 if (!require("BiocManager"))
    install.packages("BiocManager")
 BiocManager::install("maftools")
 ##download rngtools from https://cran.r-project.org/src/contrib/Archive/rngtools/
 install.packages("~/Desktop/nature_commu/rngtools_1.3.1.tar.gz", repos = NULL, type = "source")
 library(maftools)
 uvm = read.maf(maf = '.maf')
 skcm = read.maf(maf = '.maf')

 # download CanFam3.1 from ftp://ftp.ensembl.org/pub/release-96/gtf/canis_familiaris/

 oncoplot(maf = skcm, top = 20)
 ## canine mf file
 mutation= pd.read_csv('41467_2018_8081_MOESM5_ESM.txt', sep='\t')
 df1 = read_gtf("Canis_familiaris.CanFam3.1.96.gtf.gz")
 df_genes1 = df1[df1["feature"] == "gene"]
 ref= df_genes1[['gene_id','gene_name']]
 ref1=ref[ref.gene_name != '']
 ref1.columns = ['Gene','gene_name']
 Merge=pd.merge(ref1, mutation, on='Gene', how='inner')
 Merge['Chromosome']='chr'+ Merge['#Chr']

 def tidy_split(df, column, sep='|', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str
        the string used to split the column's values
    keep : bool
        whether to retain the presplit value as it's own row

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value)
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df
 merge = tidy_split(Merge, 'Consequence', sep=',')

 merge1=merge.replace(['missense_variant'],['Missense_Mutation'])
 merge2=merge1.replace(['stop_gained','start_lost'],'Nonsense_Mutation')
 merge3=merge2.replace(['synonymous_variant'],'Silent')
 merge4=merge3.replace(['inframe_insertion','inframe_deletion'],['In_Frame_Ins','In_Frame_Del'])
 merge5=merge4.replace(['frameshift_variant'],['Frame_Shift'])
 merge6=merge5.replace(['splice_donor_variant','splice_acceptor_variant','splice_region_variant'],'Splice_site')
 merge7=merge6.rename(index=str, columns={"gene_name": "Hugo_Symbol", "Position": "Start_Position","Consequence":"Variant_Classification","Ref":"Reference_Allele","Alt":"Tumor_Seq_Allele2","Sample":"Tumor_Sample_Barcode"})
 merge7['End_Position']=merge7['Start_Position']
 merge7['Variant_Type']='SNP'
 merge8= merge7[merge7.IMPACT != 'LOW']
 merge8.to_csv('canine_71cases.maf', sep='\t')


 copy= pd.read_csv('/Users/fanwang/Desktop/nature_commu/skcm/4b7a5729-b83e-4837-9b61-a6002dce1c0a/skcm_filtered.maf', sep='\t')

 copy1=copy

 len(copy1[(copy1.Hugo_Symbol=='BRAF') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


 len(copy1[(copy1.Hugo_Symbol=='RAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


 len(copy1[(copy1.Hugo_Symbol=='NF1') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


 len(copy1[(copy1.Hugo_Symbol=='NRAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


 braf_pt= list(copy1[(copy1.Hugo_Symbol=='BRAF') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())

 nf1_pt= list(copy1[(copy1.Hugo_Symbol=='NF1') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())

 nras_pt=list(copy1[(copy1.Hugo_Symbol=='NRAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())

 all_mutant = set(list(braf_pt) + list(nras_pt) + list(nf1_pt))
 negative = copy1[~ copy1.Tumor_Sample_Barcode.isin(all_mutant)]
 negative.to_csv('triple_wt_cohort.maf',sep='\t')

 braf=copy1[copy1.Tumor_Sample_Barcode.isin(braf_pt)]
 braf.to_csv('braf_mutant_cohort.maf',sep='\t')

 nras =copy1[copy1.Tumor_Sample_Barcode.isin(nras_pt)]
 nras.to_csv('nras_mutant_cohort.maf',sep='\t')

 nf1 = copy1[copy1.Tumor_Sample_Barcode.isin(nf1_pt)]
 nf1.to_csv('nf1_mutant_cohort.maf',sep='\t')
	if (!require("BiocManager"))
	install.packages("BiocManager")
	BiocManager::install("maftools")
	##download rngtools from https://cran.r-project.org/src/contrib/Archive/rngtools/
	install.packages("~/Desktop/nature_commu/rngtools_1.3.1.tar.gz", repos = NULL, type = "source")
	library(maftools)
	uvm = read.maf(maf = '.maf')
	skcm = read.maf(maf = '.maf')

	# download CanFam3.1 from ftp://ftp.ensembl.org/pub/release-96/gtf/canis_familiaris/

	oncoplot(maf = skcm, top = 20)
	## canine mf file
	mutation= pd.read_csv('41467_2018_8081_MOESM5_ESM.txt', sep='\t')
	df1 = read_gtf("Canis_familiaris.CanFam3.1.96.gtf.gz")
	df_genes1 = df1[df1["feature"] == "gene"]
	ref= df_genes1[['gene_id','gene_name']]
	ref1=ref[ref.gene_name != '']
	ref1.columns = ['Gene','gene_name']
	Merge=pd.merge(ref1, mutation, on='Gene', how='inner')
	Merge['Chromosome']='chr'+ Merge['#Chr']

	def tidy_split(df, column, sep='\|', keep=False):
	"""
	Split the values of a column and expand so the new DataFrame has one split
	value per row. Filters rows where the column is missing.

	Params
	------
	df : pandas.DataFrame
	dataframe with the column to split and expand
	column : str
	the column to split and expand
	sep : str
	the string used to split the column's values
	keep : bool
	whether to retain the presplit value as it's own row

	Returns
	-------
	pandas.DataFrame
	Returns a dataframe with the same columns as `df`.
	"""
	indexes = list()
	new_values = list()
	df = df.dropna(subset=[column])
	for i, presplit in enumerate(df[column].astype(str)):
	values = presplit.split(sep)
	if keep and len(values) > 1:
	indexes.append(i)
	new_values.append(presplit)
	for value in values:
	indexes.append(i)
	new_values.append(value)
	new_df = df.iloc[indexes, :].copy()
	new_df[column] = new_values
	return new_df
	merge = tidy_split(Merge, 'Consequence', sep=',')

	merge1=merge.replace(['missense_variant'],['Missense_Mutation'])
	merge2=merge1.replace(['stop_gained','start_lost'],'Nonsense_Mutation')
	merge3=merge2.replace(['synonymous_variant'],'Silent')
	merge4=merge3.replace(['inframe_insertion','inframe_deletion'],['In_Frame_Ins','In_Frame_Del'])
	merge5=merge4.replace(['frameshift_variant'],['Frame_Shift'])
	merge6=merge5.replace(['splice_donor_variant','splice_acceptor_variant','splice_region_variant'],'Splice_site')
	merge7=merge6.rename(index=str, columns={"gene_name": "Hugo_Symbol", "Position": "Start_Position","Consequence":"Variant_Classification","Ref":"Reference_Allele","Alt":"Tumor_Seq_Allele2","Sample":"Tumor_Sample_Barcode"})
	merge7['End_Position']=merge7['Start_Position']
	merge7['Variant_Type']='SNP'
	merge8= merge7[merge7.IMPACT != 'LOW']
	merge8.to_csv('canine_71cases.maf', sep='\t')


	copy= pd.read_csv('/Users/fanwang/Desktop/nature_commu/skcm/4b7a5729-b83e-4837-9b61-a6002dce1c0a/skcm_filtered.maf', sep='\t')

	copy1=copy

	len(copy1[(copy1.Hugo_Symbol=='BRAF') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


	len(copy1[(copy1.Hugo_Symbol=='RAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


	len(copy1[(copy1.Hugo_Symbol=='NF1') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


	len(copy1[(copy1.Hugo_Symbol=='NRAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())


	braf_pt= list(copy1[(copy1.Hugo_Symbol=='BRAF') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())

	nf1_pt= list(copy1[(copy1.Hugo_Symbol=='NF1') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())

	nras_pt=list(copy1[(copy1.Hugo_Symbol=='NRAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())

	all_mutant = set(list(braf_pt) + list(nras_pt) + list(nf1_pt))
	negative = copy1[~ copy1.Tumor_Sample_Barcode.isin(all_mutant)]
	negative.to_csv('triple_wt_cohort.maf',sep='\t')

	braf=copy1[copy1.Tumor_Sample_Barcode.isin(braf_pt)]
	braf.to_csv('braf_mutant_cohort.maf',sep='\t')

	nras =copy1[copy1.Tumor_Sample_Barcode.isin(nras_pt)]
	nras.to_csv('nras_mutant_cohort.maf',sep='\t')

	nf1 = copy1[copy1.Tumor_Sample_Barcode.isin(nf1_pt)]
	nf1.to_csv('nf1_mutant_cohort.maf',sep='\t')