Created
May 18, 2022 09:17
-
-
Save yangyushi/74dc899897918a113f074b206cd0f77a to your computer and use it in GitHub Desktop.
toy script to simulate a naïve base calling process
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from matplotlib import cm | |
def get_transfer_mat(N, p, q): | |
""" | |
Calculating the transition matrix P with shape (N, N). | |
P_ij: the transition probability of two states: | |
1) location `i` emitting to | |
2) location `j` emitting | |
Args: | |
N (int): the number of cycles | |
p (float): the probability of lagging | |
q (float): the probability of leading | |
Return: | |
np.ndarray: the transition matrix P | |
""" | |
P = np.empty((N, N)) | |
for u in range(N): | |
for v in range(N): | |
if u == v: | |
P[u, v] = p # lagging error, no advance | |
elif u+1 == v: | |
P[u, v] = 1 - p - q # normal process | |
elif u+2 == v: | |
P[u, v] = q # advance twice, leading error | |
else: | |
P[u, v] = 0 | |
return P | |
def get_phase_mat(trans_mat): | |
""" | |
Calculating the phase matrix Q with shape (N, N). | |
Q_ij: the probability of location `i` emitting light at cycle `j` | |
Args: | |
trans_mat (np.ndarray): the transition matrix | |
Return: | |
np.ndarray: the phase matrix Q | |
""" | |
N = len(trans_mat) | |
tmp = trans_mat.copy() | |
Q = np.empty((N, N)) | |
Q[0] = np.zeros(N) | |
Q[0, 0] = 1 | |
for t in range(N-1): | |
Q[t+1] = tmp[0] | |
tmp = trans_mat @ tmp | |
return Q | |
def get_fade_mat(N, tau_inv): | |
""" | |
Simulate fading matrix D as an exponential decay | |
Args: | |
N (int): the number of cycles. | |
tau_inv (float): the inverse of the relaxation time of the decay, | |
small value = slow decay | |
Return: | |
(np.ndarray): the fading matrix D, shape (N, N) | |
""" | |
D = [1] | |
for _ in range(N - 1): | |
D.append(np.exp(-tau_inv) * D[-1]) | |
return np.diag(D) | |
def get_seq_onehot(seq_indices): | |
""" | |
Get one-hot encoding from numerical sequences. | |
""" | |
N = len(seq_indices) | |
seq_arr = np.zeros((N, 4)) | |
for i, seq_idx in enumerate(seq_indices): | |
seq_arr[i][seq_idx] = 1 | |
return seq_arr | |
if __name__ == "__main__": | |
N = 50 | |
p, q, tau_inv, eps = 0.02, 0.01, 0.02, 0.005 | |
trans_mat = get_transfer_mat(N, p, q) | |
phase_mat = get_phase_mat(trans_mat) | |
fade_mat = get_fade_mat(N, tau_inv) | |
R = fade_mat @ phase_mat | |
plt.figure(figsize=(6, 5)) | |
data_str = f'$(p = {p:.2f}, q = {q:.2f})$' | |
plt.title(r"$ \mathbf{R}_\mathrm{T \times N} $" + data_str) | |
plt.imshow(R) | |
plt.ylabel("Cycle Number ($t$)") | |
plt.xlabel("Terminator Location ($n$)") | |
cb = plt.colorbar() | |
cb.set_label("Intensity (a.u.)") | |
plt.tight_layout() | |
plt.savefig('R.pdf') | |
plt.close() | |
corpus = 'ATCG' | |
seq_indices = np.random.choice(np.arange(4), size=N) | |
seq_str = "".join([corpus[i] for i in seq_indices]) | |
S = get_seq_onehot(seq_indices) | |
I = R @ S + np.random.normal(0, eps, size=(N, 4)) | |
S_hat = np.linalg.pinv(R) @ I | |
fig = plt.figure(figsize = (10, 7)) | |
ax0, ax1, ax2 = fig.subplots(3, 1, sharex=True) | |
pos = np.arange(N) | |
colors = ['olive', 'seagreen', 'crimson', 'royalblue'] | |
marksers = 'sopH' | |
for i, channel in enumerate(S.T): | |
ax0.plot(pos, channel, color=colors[i], marker=marksers[i], mfc='w', label=corpus[i]) | |
for i, channel in enumerate(I.T): | |
ax1.plot(pos, channel, color=colors[i], marker=marksers[i], mfc='w', label=corpus[i]) | |
for i, channel in enumerate(S_hat.T): | |
ax2.plot(pos, channel, color=colors[i], marker=marksers[i], mfc='w', label=corpus[i]) | |
for ax in (ax0, ax1, ax2): | |
ax.legend(ncol=4, handlelength=0.8, loc='lower left', columnspacing=1.0, fontsize=14) | |
ax.set_ylim(-0.25, 1.25) | |
ax.set_yticks([]) | |
ax.set_ylabel('Intensity (a.u.)') | |
ax0.set_title("Perfect Signal ($\mathbf{S}$)") | |
ax1.set_title("Observed Signal ($\mathbf{I} = \mathbf{R} \mathbf{S} + \mathbf{E}$)") | |
ax2.set_title("Naïvely Deconvolved Signal ($\hat{\mathbf{S}} = \mathbf{R}^{-1} \mathbf{I}$)") | |
ax2.set_xlabel("Cycle") | |
plt.tight_layout() | |
plt.savefig('signal.pdf') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The output