Created
November 18, 2017 06:35
-
-
Save drorhilman/37b099b3928af474774446bdd01c0870 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def is_nuc(nuc): | |
try: | |
n1, n2 = nuc.split(':') | |
if not n1 in 'ATGC' and n2 in 'ATGC': | |
return np.nan | |
else: | |
return nuc | |
except: | |
return np.nan | |
def nuc_combinations(nuc1, nuc2): | |
nuc1, nuc2 = is_nuc(nuc1), is_nuc(nuc2) | |
try: | |
return list(set([':'.join(sorted([n1, n2])) | |
for n1 in nuc1.split(':') | |
for n2 in nuc2.split(':')])) | |
except: return np.nan; | |
def cartesian_coord(*arrays): | |
grid = np.meshgrid(*arrays) | |
coord_list = [entry.ravel() for entry in grid] | |
points = np.vstack(coord_list).T | |
return points | |
def cross(parent1, parent2): | |
possibilities = [nuc_combinations(nuc1, nuc2) | |
for nuc1, nuc2 in zip(parent1, parent2)] | |
return cartesian_coord(*possibilities) | |
@mem.cache | |
def simulate_all_crosses(df, hybrid, parent_groups = ['female', 'male']): | |
data = df[df['hybrid'] == hybrid] | |
snps = [c for c in df.columns if 'snp' in c] | |
parents1 = data[data['type'] == parent_groups[0]][snps].values | |
parents2 = data[data['type'] == parent_groups[1]][snps].values | |
offsprings = [] | |
for comb in tqdm(product(parents1, parents2), leave=False): | |
try: | |
offsprings += cross(*comb).tolist() | |
except: | |
print(comb) | |
offsprings = pd.DataFrame(offsprings, columns=snps) | |
offsprings = offsprings.drop_duplicates() | |
offsprings['hybrid'] = hybrid | |
if parent_groups[0] != parent_groups[1]: | |
offsprings['status'] = 'hybrid' | |
else: | |
offsprings['status'] = 'inbred' | |
#add the actual data | |
return offsprings |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment