sdhutchins · January 26, 2017 19:34 · sdhutchins · Jan 14, 2017
diff --git a/mygene_example.py b/mygene_example.py
 # -*- coding: utf-8 -*-
 """
 Created on Fri Jan 13 18:18:52 2017

 @author: sdhutchins

 """
 #------------------------------------------------------------------------------
 # Modules Used
 #------------------------------------------------------------------------------
 import mygene
 import csv
 import pandas as pd
 import sys

 # Import mygene.MyGeneInfo() search command
 mg = mygene.MyGeneInfo()


 #------------------------------------------------------------------------------
 # Create a list of gene symbols/names for .csv file
 #------------------------------------------------------------------------------
 g = open('genes.csv')  # List of genes
 genes_list = []   # Initialize a list of genes
 genes_list.append('')
 file2 = csv.reader(g)
 for gene in file2:    # Format a list of genes
    genes = str(gene)
    genes = genes.replace("'", "")
    genes = genes.replace("[", "")
    genes = genes.replace("]", "")
    genes = genes.replace(" ", "_")
    genes_list.append(genes)
 print(genes_list)

 #------------------------------------------------------------------------------
 # Set up Input to start command if gene list is correct
 #------------------------------------------------------------------------------
 """
 x = str(input('Is the input properly formatted? (Type Yes or No) '))
 if x == 'Yes':
    print("\n" + "MyGene will start." + "\n")
 else:
     raise SystemExit
 """
 #------------------------------------------------------------------------------
 # Use MyGene to get gene information
 #------------------------------------------------------------------------------
 """
 Call querymany method.
 Scopes is your query, and it can be "entrezgene", "symbol" such as HTR1A, "mim" for omim id,
 and "accession". FOr more, see: http://mygene.info/doc/query_service.html#available_fields
 Input is "symbol" in this example. Scroll to the bottom of this script for a list of genes I used.

 Set as_dataframe to True will return a pandas dataframe object
 Set verbose to False as this will suppress the messages like "finished".
 The resuls will be a list of dictionaries.
 The dictionary contains the entrezid for the "entrezgene" field.
 If you want the ensembl ids, use fields='ensembl.gene'

 List of fields: http://mygene.info/metadata/fields
 Fields can be set to 'all' for all fields to return.

 There are also multiple species available or you can input the Taxonomy ID.

 Examples:
 entrez_ids = mg.querymany(genes_list, scopes='symbol,ensembl.gene', fields='entrezgene',
                          species='human', returnall=True, as_dataframe=True)

 ensembl_ids = mg.querymany(genes_list, scopes='symbol', fields='ensembl.gene',
                          species='9606', returnall=True, as_dataframe=True)
 """
 # This creates a dictionary of basic human gene information to be used later
 basic_gene_info = mg.querymany(genes_list, scopes='symbol',
                          fields='symbol,name,entrezgene,summary',
                           species='human', returnall=True, as_dataframe=True,
                           size=1)

 #------------------------------------------------------------------------------
 # Use pandas to turn results of the mygene queries into dataframes
 #------------------------------------------------------------------------------
 """
 Use dict.keys() or basic_info.keys() to find out what the data keys are.
 The data keys will be 'out' for output, 'missing' for any missing genes, 'dup' for any duplicates
 Write the dataframe to a csv file using pandas (it saved as a dataframe).
 Save the data as a .csv file.
 Use df.drop to delete columns of the data you don't want.

 Additional dictionary command:
 To return a dictionary of MyGene.info metadata, use metadata = mg.metadata
 """
 # Turn the dict into a pandas csv file
 basic_gene_info['out'].to_csv('basic_gene_info.csv', sep=',', encoding='utf-8')
 df = pd.read_csv('basic_gene_info.csv')
 data = df
 gene_info = pd.DataFrame(data)
 gene_info.drop(data.columns[[1,2,6]], axis=1, inplace=True)

 # Rename the columns
 gene_info.rename(columns={'entrezgene': 'Entrez ID','summary':
    'Gene Summary','query': 'Gene Symbol','name': 'Gene Name'}, inplace=True)

 gene_info.to_csv('basic_gene_info.csv', index=False)


 """
 List of Genes I used (I saved them to a csv file - genes.csv in this example)
 ADRA1A
 ADRA1B
 ADRA1D
 ADRA2A
 ADRA2B
 CHRM1
 CHRM2
 CHRM3
 CHRM5
 CNR1
 CNR2
 DRD2
 DRD3
 GABBR2
 HTR1A
 HTR1D
 HTR1F
 HTR2A
 HTR2B
 HTR4
 HTR5A
 HTR7
 OPRK1
 OPRM1
 """
	# -- coding: utf-8 --
	"""
	Created on Fri Jan 13 18:18:52 2017

	@author: sdhutchins

	"""
	#------------------------------------------------------------------------------
	# Modules Used
	#------------------------------------------------------------------------------
	import mygene
	import csv
	import pandas as pd
	import sys

	# Import mygene.MyGeneInfo() search command
	mg = mygene.MyGeneInfo()


	#------------------------------------------------------------------------------
	# Create a list of gene symbols/names for .csv file
	#------------------------------------------------------------------------------
	g = open('genes.csv') # List of genes
	genes_list = [] # Initialize a list of genes
	genes_list.append('')
	file2 = csv.reader(g)
	for gene in file2: # Format a list of genes
	genes = str(gene)
	genes = genes.replace("'", "")
	genes = genes.replace("[", "")
	genes = genes.replace("]", "")
	genes = genes.replace(" ", "_")
	genes_list.append(genes)
	print(genes_list)

	#------------------------------------------------------------------------------
	# Set up Input to start command if gene list is correct
	#------------------------------------------------------------------------------
	"""
	x = str(input('Is the input properly formatted? (Type Yes or No) '))
	if x == 'Yes':
	print("\n" + "MyGene will start." + "\n")
	else:
	raise SystemExit
	"""
	#------------------------------------------------------------------------------
	# Use MyGene to get gene information
	#------------------------------------------------------------------------------
	"""
	Call querymany method.
	Scopes is your query, and it can be "entrezgene", "symbol" such as HTR1A, "mim" for omim id,
	and "accession". FOr more, see: http://mygene.info/doc/query_service.html#available_fields
	Input is "symbol" in this example. Scroll to the bottom of this script for a list of genes I used.

	Set as_dataframe to True will return a pandas dataframe object
	Set verbose to False as this will suppress the messages like "finished".
	The resuls will be a list of dictionaries.
	The dictionary contains the entrezid for the "entrezgene" field.
	If you want the ensembl ids, use fields='ensembl.gene'

	List of fields: http://mygene.info/metadata/fields
	Fields can be set to 'all' for all fields to return.

	There are also multiple species available or you can input the Taxonomy ID.

	Examples:
	entrez_ids = mg.querymany(genes_list, scopes='symbol,ensembl.gene', fields='entrezgene',
	species='human', returnall=True, as_dataframe=True)

	ensembl_ids = mg.querymany(genes_list, scopes='symbol', fields='ensembl.gene',
	species='9606', returnall=True, as_dataframe=True)
	"""
	# This creates a dictionary of basic human gene information to be used later
	basic_gene_info = mg.querymany(genes_list, scopes='symbol',
	fields='symbol,name,entrezgene,summary',
	species='human', returnall=True, as_dataframe=True,
	size=1)

	#------------------------------------------------------------------------------
	# Use pandas to turn results of the mygene queries into dataframes
	#------------------------------------------------------------------------------
	"""
	Use dict.keys() or basic_info.keys() to find out what the data keys are.
	The data keys will be 'out' for output, 'missing' for any missing genes, 'dup' for any duplicates
	Write the dataframe to a csv file using pandas (it saved as a dataframe).
	Save the data as a .csv file.
	Use df.drop to delete columns of the data you don't want.

	Additional dictionary command:
	To return a dictionary of MyGene.info metadata, use metadata = mg.metadata
	"""
	# Turn the dict into a pandas csv file
	basic_gene_info['out'].to_csv('basic_gene_info.csv', sep=',', encoding='utf-8')
	df = pd.read_csv('basic_gene_info.csv')
	data = df
	gene_info = pd.DataFrame(data)
	gene_info.drop(data.columns[[1,2,6]], axis=1, inplace=True)

	# Rename the columns
	gene_info.rename(columns={'entrezgene': 'Entrez ID','summary':
	'Gene Summary','query': 'Gene Symbol','name': 'Gene Name'}, inplace=True)

	gene_info.to_csv('basic_gene_info.csv', index=False)


	"""
	List of Genes I used (I saved them to a csv file - genes.csv in this example)
	ADRA1A
	ADRA1B
	ADRA1D
	ADRA2A
	ADRA2B
	CHRM1
	CHRM2
	CHRM3
	CHRM5
	CNR1
	CNR2
	DRD2
	DRD3
	GABBR2
	HTR1A
	HTR1D
	HTR1F
	HTR2A
	HTR2B
	HTR4
	HTR5A
	HTR7
	OPRK1
	OPRM1
	"""