Created
November 18, 2022 13:09
-
-
Save Martins6/bec501419aeb287c9bee6e43765c2cf9 to your computer and use it in GitHub Desktop.
Calculate the multinomial goodness of fit through Log Likelihood Ratio test statistic with the null distribution calculated via Monte Carlo simulation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def multinomial_goodness_of_fit( | |
f_obs, | |
p_exp=None, | |
b=1000, | |
delta=0 | |
) -> list: | |
""" | |
Calculate the multinomial goodness of fit through Log Likelihood Ratio test statistic with the | |
null distribution calculated via Monte Carlo simulation. | |
Args: | |
f_obs (np.array): The observed frequency in each category. | |
p_exp (np.array): Defaults to None. The expected probability in each category. Our H0, in other words. | |
If None it give equal probability to every category. | |
b (int): Positive number. The number of times to generate the samples from the | |
multinomial and calculate our test statistic. | |
delta (float): Positive number to deal with categories that didn't occur. | |
Returns: | |
statistic (float): the observed choosen statistic. | |
p-value (float): the p-value of the observed statistic. | |
References: | |
https://www.biostat.wisc.edu/~kbroman/teaching/labstat/fourth/notes02.pdf | |
https://cran.r-project.org/web/packages/XNomial/vignettes/XNomial.html | |
https://en.wikipedia.org/wiki/G-test | |
https://en.wikipedia.org/wiki/Multinomial_test | |
""" | |
f_obs_delta = f_obs + delta | |
def llr_stat(f_obs, p_exp, p = False): | |
results = np.zeros(f_obs.shape) | |
for i, value in enumerate(f_obs): | |
if (f_obs[i] != 0) & (p_exp[i] > 0): | |
results[i] = 2 * (f_obs[i] * np.log( (f_obs[i]/f_obs.sum()) / p_exp[i])) | |
return results.sum() | |
rng = np.random.default_rng() | |
if p_exp is None: | |
p_exp = np.repeat(1/f_obs.size, f_obs.size) | |
mult_samples = rng.multinomial(f_obs.sum(), p_exp, size=b) + delta | |
test_statistic_simulation_result = np.zeros((b,)) | |
for i, f_exp_sample in enumerate(mult_samples): | |
statistic_sample = llr_stat(f_exp_sample, p_exp) | |
test_statistic_simulation_result[i] = statistic_sample | |
obs_statistics = llr_stat(f_obs_delta, p_exp) | |
p_value = (test_statistic_simulation_result >= obs_statistics).mean() | |
return obs_statistics, p_value, test_statistic_simulation_result | |
def multinomial_adjust(arr, c=100): | |
arr = arr * c | |
arr[arr == 0] = 1 | |
return arr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment