Last active
October 26, 2021 09:15
-
-
Save munoztd0/383aa93ffa161665c0dca1103ef73d9d to your computer and use it in GitHub Desktop.
FUNCTIONS FOR COMPUTING COHEN'S d AND PARTIAL ETA-SQUARED ALONG WITH THEIR CONFIDENCE INTERVAL BY YOANN STUSSI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################## | |
# # | |
# FUNCTION FOR COMPUTING COHEN'S d & HEDGES' g # | |
# ALONG WITH THEIR CONFIDENCE INTERVAL # | |
# FOR INDEPENDENT, ONE-SAMPLE, OR PAIRED T TESTS # | |
# # | |
########################################################################## | |
# LAST UPDATED ON: 18.09.19 by YS | |
# Based on: | |
# Lakens, D. (2013). Calculating and reporting effect sizes to facilitate cumulative science: A practical primer for t-tests and ANOVAs. Frontiers in Psychology, 4, 863. https://doi.org/10.3389/fpsyg.2013.00863 | |
# Lakens, D. (2015). The perfect t-test. Retrieved from https://Neutralthub.com/Lakens/perfect-t-test. https://doi.org/10.5281/zenodo.17603 | |
#---------------------------------------------------------------------------------------------------------------------------# | |
# INPUTS: | |
# x = data vector in wide format for the first condition | |
# y = data vector in wide format for the second condition [if missing -> one-sample t test] | |
# mu = (for one-sample t tests) reference mean [default -> 0] | |
# paired = TRUE -> paired t test, FALSE -> indepedent-sample t test | |
# var.equal = TRUE -> two variances treated as equal, FALSE [default] -> two variances treated as unequal (Welch's t test) | |
# conf.level = level of the confidence interval between 0 to 1 [default -> .95] | |
#---------------------------------------------------------------------------------------------------------------------------# | |
cohen_d_ci <- function(x, y, mu, paired, var.equal, conf.level) | |
{ | |
#----INPUTS | |
# y input | |
ifelse(missing(y), y <- NA, y <- y) | |
# mu input | |
ifelse(missing(mu), mu <- 0, mu <- mu) | |
# paired input | |
ifelse(missing(paired), ifelse(length(x) != length(y), paired <- F, paired <- paired), paired <- paired) | |
# var.equal input | |
ifelse(missing(var.equal), var.equal <- F, var.equal <- var.equal) | |
# conf.level input | |
ifelse(missing(conf.level), conf.level <- .95, conf.level <- conf.level) | |
#---------------------------------------------------------------------------------------------------------# | |
#----INDEPENDENT-SAMPLE T TEST | |
if (is.na(y) == F && paired == F) { | |
# output from t test | |
ttest.output <- t.test(x = x, y = y, paired = F, var.equal = var.equal, conf.level = conf.level) | |
# effect size (Cohen's d_s) | |
m.x <- mean(x) | |
m.y <- mean(y) | |
sd.x <- sd(x) | |
sd.y <- sd(y) | |
n.x <- length(x) | |
n.y <- length(y) | |
tvalue <- ttest.output$statistic | |
d_s <- (m.x - m.y)/(sqrt(((n.x - 1) * sd.x^2 + (n.y - 1) * sd.y^2)/(n.x + n.y - 2))) | |
g_s <- d_s * (1 - 3/(4 * (n.x + n.y - 2) - 1)) | |
require(MBESS) | |
ci_lower_d_s <- ci.smd(ncp = tvalue, | |
n.1 = n.x, | |
n.2 = n.y, | |
conf.level = conf.level)$Lower.Conf.Limit.smd | |
ci_upper_d_s <- ci.smd(ncp = tvalue, | |
n.1 = n.x, | |
n.2 = n.y, | |
conf.level = conf.level)$Upper.Conf.Limit.smd | |
# SUMMARY TABLE | |
table.effectsize <- matrix(c(d_s, g_s, ci_lower_d_s, ci_upper_d_s), | |
ncol = 4, byrow = F) | |
colnames(table.effectsize) <- c("Cohen's d_s", "Hedges' g_s", | |
paste(conf.level*100, "% CI lower bound"), | |
paste(conf.level*100, "% CI upper bound")) | |
rownames(table.effectsize) <- "Effect size" | |
print(table.effectsize) | |
# OUTPUT | |
output <- list(d_s = d_s, g_s = g_s, ci_lower_d_s = ci_lower_d_s, ci_upper_d_s = ci_upper_d_s) | |
return(invisible(output)) | |
} | |
#---------------------------------------------------------------------------------------------------------# | |
#----PAIRED-SAMPLE T TEST | |
if (is.na(y) == F && paired == T) { | |
# output from t test | |
ttest.output <- t.test(x = x, y = y, paired = T, conf.level = conf.level) | |
# effect size | |
diff <- x - y | |
m_diff <- mean(diff) | |
sd_diff <- sd(diff) | |
sd.x <- sd(x) | |
sd.y <- sd(y) | |
N <- length(x) | |
r <- cor(x, y) | |
tvalue <- ttest.output$statistic | |
# Cohen's d_z | |
d_z <- tvalue/sqrt(N) | |
g_z <- d_z * (1 - 3/(4 * (N - 1) - 1)) | |
require(MBESS) | |
nct_limits <- conf.limits.nct(t.value = tvalue, df = N - 1, conf.level = conf.level) | |
ci_lower_d_z <- nct_limits$Lower.Limit/sqrt(N) | |
ci_upper_d_z <- nct_limits$Upper.Limit/sqrt(N) | |
# Cohen's d_rm | |
s_rm <- sqrt(sd.x^2 + sd.y^2 - 2 * r * sd.x * sd.y) / sqrt(2 * (1 - r)) | |
d_rm <- m_diff/s_rm | |
g_rm <- d_rm * (1 - 3/(4 * (N - 1) - 1)) | |
ci_lower_d_rm <- nct_limits$Lower.Limit * sqrt((2 * (sd.x^2 + sd.y^2 - 2 * (r * sd.x * sd.y)))/(N * (sd.x^2 + sd.y^2))) | |
ci_upper_d_rm <- nct_limits$Upper.Limit * sqrt((2 * (sd.x^2 + sd.y^2 - 2 * (r * sd.x * sd.y)))/(N * (sd.x^2 + sd.y^2))) | |
# Cohen's d_av | |
s_av <- sqrt((sd.x^2 + sd.y^2)/2) | |
d_av <- m_diff/s_av | |
g_av <- d_av * (1 - (3 / (4 * (N - 1) - 1))) | |
ci_lower_d_av <- nct_limits$Lower.Limit * sd_diff/(s_av * sqrt(N)) | |
ci_upper_d_av <- nct_limits$Upper.Limit * sd_diff/(s_av * sqrt(N)) | |
# SUMMARY TABLE | |
table.effectsize <- matrix(c(d_z, g_z, ci_lower_d_z, ci_upper_d_z, | |
d_rm, g_rm, ci_lower_d_rm, ci_upper_d_rm, | |
d_av, g_av, ci_lower_d_av, ci_upper_d_av), | |
ncol = 4, byrow = T) | |
colnames(table.effectsize) <- c("Cohen's d", "Hedges' g", | |
paste(conf.level*100, "% CI lower bound"), | |
paste(conf.level*100, "% CI upper bound")) | |
rownames(table.effectsize) <- c("d_z", "d_rm", "d_av") | |
print(table.effectsize) | |
# OUTPUT | |
output <- list(d_z = d_z, g_z = g_z, ci_lower_d_z = ci_lower_d_z, ci_upper_d_z = ci_upper_d_z, | |
d_rm = d_rm, g_rm = g_rm, ci_lower_d_rm = ci_lower_d_rm, ci_upper_d_rm = ci_upper_d_rm, | |
d_av = d_av, g_av = g_av, ci_lower_d_av = ci_lower_d_av, ci_upper_d_av = ci_upper_d_av) | |
return(invisible(output)) | |
} | |
#---------------------------------------------------------------------------------------------------------# | |
#----ONE-SAMPLE T TEST | |
if (is.na(y) == T) { | |
# output from t test | |
ttest.output <- t.test(x = x, mu = mu, conf.level = conf.level) | |
# effect size | |
diff <- x - mu | |
m_diff <- mean(diff) | |
sd_diff <- sd(diff) | |
sd.x <- sd(x) | |
N <- length(x) | |
tvalue <- ttest.output$statistic | |
# Cohen's d_z | |
d_z <- tvalue/sqrt(N) | |
g_z <- d_z * (1 - 3/(4 * (N - 1) - 1)) | |
require(MBESS) | |
nct_limits <- conf.limits.nct(t.value = tvalue, df = N - 1, conf.level = conf.level) | |
ci_lower_d_z <- nct_limits$Lower.Limit/sqrt(N) | |
ci_upper_d_z <- nct_limits$Upper.Limit/sqrt(N) | |
# # ALTERNATIVE METHOD (gives the exact same results) | |
# d_z <- m_diff/sd.x | |
# g_z <- d_z * (1 - (3 / (4 * (N - 1) - 1))) | |
# | |
# ci_lower_d_z <- nct_limits$Lower.Limit * sd_diff/(sd.x * sqrt(N)) | |
# ci_upper_d_z <- nct_limits$Upper.Limit * sd_diff/(sd.x * sqrt(N)) | |
# SUMMARY TABLE | |
table.effectsize <- matrix(c(d_z, g_z, ci_lower_d_z, ci_upper_d_z), | |
ncol = 4, byrow = T) | |
colnames(table.effectsize) <- c("Cohen's d", "Hedges' g", | |
paste(conf.level*100, "% CI lower bound"), | |
paste(conf.level*100, "% CI upper bound")) | |
rownames(table.effectsize) <- c("d_z") | |
print(table.effectsize) | |
# OUTPUT | |
output <- list(d_z = d_z, g_z = g_z, ci_lower_d_z = ci_lower_d_z, ci_upper_d_z = ci_upper_d_z) | |
return(invisible(output)) | |
} | |
} | |
########################################################################## | |
# # | |
# FUNCTION FOR COMPUTING PARTIAL ETA-SQUARED # | |
# ALONG WITH THEIR CONFIDENCE INTERVAL # | |
# FOR ANALYSES OF VARIANCE (ANOVAs) # | |
# # | |
########################################################################## | |
# FROM Yoann STUSSI , LAST UPDATED ON: 27.03.20 by David MUNOZ TORD | |
# Based on: | |
# Lakens, D. (2013). Calculating and reporting effect sizes to facilitate cumulative science: A practical primer for t-tests and ANOVAs. | |
# Frontiers in Psychology, 4, 863. https://doi.org/10.3389/fpsyg.2013.00863 | |
# http://daniellakens.blogspot.com/2014/06/calculating-confidence-intervals-for.html | |
#------------------------------------------------------------------------------------------------------------------------# | |
# ARGUMENTS: | |
# formula = formula specifying the model using the aov format (e.g., DV ~ IVb * IVw1 * IVw2 + Error(id/(IVw1 * IVw2))) | |
# data = a data frame in which the variables specified in the formula will be found | |
# conf.level = level of the confidence interval between 0 to 1 [default -> .90] | |
# epsilon = epsilon correction used to adjust the lack of sphericity for factors involving repeated measures among | |
# list("default", "none, "GG", "HF") | |
# - default: use Greenhouse-Geisser (GG) correction when epsilon GG < .75, Huynh-Feldt (HF) correction | |
# when epsilon GG = [.75, .90], and no correction when epsilon GG >= .90 [default] | |
# - none: use no sphericity correction | |
# - GG: use Greenhouse-Geisser (GG) correction for all factors (>2 levels) involving repeated measures | |
# - HF: use Huynh-Feldt (HF) correction for all factors (>2 levels) involving repeated measures | |
# anova.type = type of sum of squares used in list("II", "III", 2, 3) | |
# - "II" or 2: use type II sum of squares (hierarchical or partially sequential) | |
# - "III" or 3: use type III sum of squares (marginal or orthogonal) [default] | |
# added observed and factorize arguments | |
#------------------------------------------------------------------------------------------------------------------------# | |
pes_ci <- function(formula, data, conf.level, epsilon, anova.type, observed=NULL, factorize=afex_options("factorize")) | |
{ | |
#----------------------------------------------------------------------------------------------# | |
#----ARGUMENTS | |
# conf.level | |
ifelse(missing(conf.level), conf.level <- .90, conf.level <- conf.level) | |
# epsilon | |
ifelse(missing(epsilon), epsilon <- "default", epsilon <- epsilon) | |
iscorrectionok <- epsilon == list("default", "none", "GG", "HF") | |
if(all(iscorrectionok == F)) stop('Unknown correction for sphericity used. Please use (i) the default option ("default"; i.e., no correction applied when epsilon GG >= .90, | |
GG correction applied when GG epsilon < .75, and HF correction applied when GG epsilon = [.75, .90]), | |
(ii) no correction ("none"), (iii) Greenhous-Geisser ("GG") correction, or (iv) Huyhn-Feldt ("HF") correction.') | |
# anova.type | |
ifelse(missing(anova.type), anova.type <- "III", anova.type <- anova.type) | |
#----------------------------------------------------------------------------------------------# | |
#----COMPUTE ETA-SQUARED EFFECT SIZE ESTIMATES | |
# afex package for computing eta-squared estimates | |
require(afex) | |
# compute anova with partial (pes) eta-squared estimates | |
aov <- aov_car(formula = formula, | |
data = data, | |
anova_table = list(es = "pes"), | |
type = anova.type, | |
observed = observed, | |
factorize = factorize) | |
aov.sum <- summary(aov) | |
#----CREATE MATRIX TO STORE RESULTS | |
matrix.es <- matrix(nrow = length(rownames(aov$anova_table)), ncol = 3) | |
#----------------------------------------------------------------------------------------------# | |
#----COMPUTE CONFIDENCE INTERVAL FOR EACH FACTOR | |
# MBESS package for computing confidence intervals | |
require(MBESS) | |
for (i in 1:length(rownames(aov$anova_table))) { | |
# CHECK | |
if (length(aov.sum$pval.adjustments) != 0) { | |
#--------------------------------------------------------------------------------------------# | |
# DEFAULT OPTION | |
# (no correction if GG epsilon >= .90, GG correction if GG epsilon < .75, or HF correction if GG epsilon = [.75, .90]) | |
if (epsilon == "default") { | |
# Search for factors with more than 2 levels | |
for (j in 1:length(rownames(aov.sum$pval.adjustments))) { | |
if (rownames(aov.sum$pval.adjustments)[j] == rownames(aov$anova_table)[i]) { | |
# Apply GG correction if GG epsilon < .75 | |
if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && aov.sum$pval.adjustments[j, "GG eps"] < .75) { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "GG eps"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "GG eps"]) | |
break | |
} | |
# Apply HF correction if GG epsilon = [.75, .90] | |
else if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && aov.sum$pval.adjustments[j, "GG eps"] >= .75 && aov.sum$pval.adjustments[j, "GG eps"] < .90) { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "HF eps"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "HF eps"]) | |
break | |
} | |
# Apply no correction if GG epsilon >= .90 | |
else if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && aov.sum$pval.adjustments[j, "GG eps"] >= .90) { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"]) | |
break | |
} | |
} else { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"]) | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------------# | |
# SPHERICITY ASSUMED | |
if (epsilon == "none") { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"]) | |
} | |
#--------------------------------------------------------------------------------------------# | |
# GREENHOUSE-GEISSER (GG) CORRECTION | |
else if (epsilon == "GG") { | |
# Search for factors with more than 2 levels | |
for (j in 1:length(rownames(aov.sum$pval.adjustments))) { | |
if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && rownames(aov.sum$pval.adjustments)[j] == rownames(aov$anova_table)[i]) { | |
# Apply GG correction | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "GG eps"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "GG eps"]) | |
break | |
} else { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"]) | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------------# | |
# HUYNH-FELDT (HF) CORRECTION | |
else if (epsilon == "HF") { | |
# Search for factors with more than 2 levels | |
for (j in 1:length(rownames(aov.sum$pval.adjustments))) { | |
if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && rownames(aov.sum$pval.adjustments)[j] == rownames(aov$anova_table)[i]) { | |
# Apply HF correction | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "HF eps"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "HF eps"]) | |
break | |
} else { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"], | |
conf.level = conf.level, | |
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"], | |
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"]) | |
} | |
} | |
} | |
} else { | |
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$F[i], conf.level = .90, | |
df.1 <- aov.sum$"num Df"[i], | |
df.2 <- aov.sum$"den Df"[i]) | |
} | |
#--------------------------------------------------------------------------------------------# | |
# LOWER LIMIT | |
aov.sum.lower_lim <- ifelse(is.na(aov.sum.lim$Lower.Limit/(aov.sum.lim$Lower.Limit + df.1 + df.2 + 1)), | |
0, | |
aov.sum.lim$Lower.Limit/(aov.sum.lim$Lower.Limit + df.1 + df.2 + 1)) | |
# UPPER LIMIT | |
aov.sum.upper_lim <- aov.sum.lim$Upper.Limit/(aov.sum.lim$Upper.Limit + df.1 + df.2 + 1) | |
#--------------------------------------------------------------------------------------------# | |
#----STORE RESULTS IN THE MATRIX | |
matrix.es[i,] <- matrix(c(aov$anova_table$pes[i], | |
aov.sum.lower_lim, | |
aov.sum.upper_lim), | |
ncol = 3) | |
} | |
#----------------------------------------------------------------------------------------------# | |
#----OUTPUT MATRIX WITH PARTIAL ETA-SQUARED EFFECT SIZE ESTIMATES AND THEIR CONFIDENCE INTERVAL | |
# Rename rows and columns | |
rownames(matrix.es) <- rownames(aov$anova_table) | |
colnames(matrix.es) <- c("Partial eta-squared", | |
paste(conf.level * 100, "% CI lower limit"), | |
paste(conf.level * 100, "% CI upper limit")) | |
# Output | |
return(matrix.es) | |
} | |
se <- function (x,na.rm=TRUE) { | |
if (!is.vector(x)) STOP("'x' must be a vector.") | |
if (!is.numeric(x)) STOP("'x' must be numeric.") | |
if (na.rm) x <- x[stats::complete.cases(x)] | |
sqrt(stats::var(x)/length(x)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment