Skip to content

Instantly share code, notes, and snippets.

@wangfan860
Last active May 13, 2019 15:55
Show Gist options
  • Save wangfan860/590d1a9d4ebcef59895bb730ed7c0efd to your computer and use it in GitHub Desktop.
Save wangfan860/590d1a9d4ebcef59895bb730ed7c0efd to your computer and use it in GitHub Desktop.
make canine and human maftools plot
if (!require("BiocManager"))
install.packages("BiocManager")
BiocManager::install("maftools")
##download rngtools from https://cran.r-project.org/src/contrib/Archive/rngtools/
install.packages("~/Desktop/nature_commu/rngtools_1.3.1.tar.gz", repos = NULL, type = "source")
library(maftools)
uvm = read.maf(maf = '.maf')
skcm = read.maf(maf = '.maf')
# download CanFam3.1 from ftp://ftp.ensembl.org/pub/release-96/gtf/canis_familiaris/
oncoplot(maf = skcm, top = 20)
## canine mf file
mutation= pd.read_csv('41467_2018_8081_MOESM5_ESM.txt', sep='\t')
df1 = read_gtf("Canis_familiaris.CanFam3.1.96.gtf.gz")
df_genes1 = df1[df1["feature"] == "gene"]
ref= df_genes1[['gene_id','gene_name']]
ref1=ref[ref.gene_name != '']
ref1.columns = ['Gene','gene_name']
Merge=pd.merge(ref1, mutation, on='Gene', how='inner')
Merge['Chromosome']='chr'+ Merge['#Chr']
def tidy_split(df, column, sep='|', keep=False):
"""
Split the values of a column and expand so the new DataFrame has one split
value per row. Filters rows where the column is missing.
Params
------
df : pandas.DataFrame
dataframe with the column to split and expand
column : str
the column to split and expand
sep : str
the string used to split the column's values
keep : bool
whether to retain the presplit value as it's own row
Returns
-------
pandas.DataFrame
Returns a dataframe with the same columns as `df`.
"""
indexes = list()
new_values = list()
df = df.dropna(subset=[column])
for i, presplit in enumerate(df[column].astype(str)):
values = presplit.split(sep)
if keep and len(values) > 1:
indexes.append(i)
new_values.append(presplit)
for value in values:
indexes.append(i)
new_values.append(value)
new_df = df.iloc[indexes, :].copy()
new_df[column] = new_values
return new_df
merge = tidy_split(Merge, 'Consequence', sep=',')
merge1=merge.replace(['missense_variant'],['Missense_Mutation'])
merge2=merge1.replace(['stop_gained','start_lost'],'Nonsense_Mutation')
merge3=merge2.replace(['synonymous_variant'],'Silent')
merge4=merge3.replace(['inframe_insertion','inframe_deletion'],['In_Frame_Ins','In_Frame_Del'])
merge5=merge4.replace(['frameshift_variant'],['Frame_Shift'])
merge6=merge5.replace(['splice_donor_variant','splice_acceptor_variant','splice_region_variant'],'Splice_site')
merge7=merge6.rename(index=str, columns={"gene_name": "Hugo_Symbol", "Position": "Start_Position","Consequence":"Variant_Classification","Ref":"Reference_Allele","Alt":"Tumor_Seq_Allele2","Sample":"Tumor_Sample_Barcode"})
merge7['End_Position']=merge7['Start_Position']
merge7['Variant_Type']='SNP'
merge8= merge7[merge7.IMPACT != 'LOW']
merge8.to_csv('canine_71cases.maf', sep='\t')
copy= pd.read_csv('/Users/fanwang/Desktop/nature_commu/skcm/4b7a5729-b83e-4837-9b61-a6002dce1c0a/skcm_filtered.maf', sep='\t')
copy1=copy
len(copy1[(copy1.Hugo_Symbol=='BRAF') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
len(copy1[(copy1.Hugo_Symbol=='RAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
len(copy1[(copy1.Hugo_Symbol=='NF1') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
len(copy1[(copy1.Hugo_Symbol=='NRAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
braf_pt= list(copy1[(copy1.Hugo_Symbol=='BRAF') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
nf1_pt= list(copy1[(copy1.Hugo_Symbol=='NF1') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
nras_pt=list(copy1[(copy1.Hugo_Symbol=='NRAS') & (copy1.Variant_Classification != 'Silent')].Tumor_Sample_Barcode.unique())
all_mutant = set(list(braf_pt) + list(nras_pt) + list(nf1_pt))
negative = copy1[~ copy1.Tumor_Sample_Barcode.isin(all_mutant)]
negative.to_csv('triple_wt_cohort.maf',sep='\t')
braf=copy1[copy1.Tumor_Sample_Barcode.isin(braf_pt)]
braf.to_csv('braf_mutant_cohort.maf',sep='\t')
nras =copy1[copy1.Tumor_Sample_Barcode.isin(nras_pt)]
nras.to_csv('nras_mutant_cohort.maf',sep='\t')
nf1 = copy1[copy1.Tumor_Sample_Barcode.isin(nf1_pt)]
nf1.to_csv('nf1_mutant_cohort.maf',sep='\t')
@wangfan860
Copy link
Author

Nonsense mutations include stop-gain and start-loss.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment