Last active
October 21, 2024 13:23
-
-
Save yjunechoe/37e6c1300b96f7c466807cc538efbd65 to your computer and use it in GitHub Desktop.
Examples of diagnostics for convergence failures using data from Choe, Yoshida, & Cole (2022)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(arrow) | |
CYC_2022 <- read_feather("https://raw.githubusercontent.com/yjunechoe/Semantic-Persistence/refs/heads/master/CYC_2022.arrow") | |
# Diagnosing warnings of type: "Model failed to converge with max|grad| ..." | |
library(lme4) | |
#> Loading required package: Matrix | |
fm <- Accuracy ~ Condition * SemanticFit * Transitivity + | |
(1 | Item) + | |
(1 + Condition | Subject) | |
mod1 <- glmer(fm, CYC_2022, binomial()) | |
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : | |
#> Model failed to converge with max|grad| = 0.00424328 (tol = 0.002, component 1) | |
mod1@optinfo$optimizer | |
#> [1] "Nelder_Mead" | |
mod1@optinfo$feval | |
#> [1] 1741 | |
mod1@optinfo$conv | |
#> $opt | |
#> [1] 0 | |
#> | |
#> $lme4 | |
#> $lme4$code | |
#> [1] -1 | |
#> | |
#> $lme4$messages | |
#> [1] "Model failed to converge with max|grad| = 0.00424328 (tol = 0.002, component 1)" | |
-2 * as.numeric(logLik(mod1)) | |
#> [1] 1227.308 | |
lobstr::tree(glmerControl()) | |
#> S3<glmerControl/merControl> | |
#> ├─optimizer<chr [2]>: "bobyqa", "Nelder_Mead" | |
#> ├─restart_edge: FALSE | |
#> ├─boundary.tol: 1e-05 | |
#> ├─calc.derivs: TRUE | |
#> ├─use.last.params: FALSE | |
#> ├─checkControl: <list> | |
#> │ ├─check.nobs.vs.rankZ: "ignore" | |
#> │ ├─check.nobs.vs.nlev: "stop" | |
#> │ ├─check.nlev.gtreq.5: "ignore" | |
#> │ ├─check.nlev.gtr.1: "stop" | |
#> │ ├─check.nobs.vs.nRE: "stop" | |
#> │ ├─check.rankX: "message+drop.cols" | |
#> │ ├─check.scaleX: "warning" | |
#> │ ├─check.formula.LHS: "stop" | |
#> │ └─check.response.not.const: "stop" | |
#> ├─checkConv: <list> | |
#> │ ├─check.conv.grad: <list> | |
#> │ │ ├─action: "warning" | |
#> │ │ ├─tol: 0.002 | |
#> │ │ └─relTol: <NULL> | |
#> │ ├─check.conv.singular: <list> | |
#> │ │ ├─action: "message" | |
#> │ │ └─tol: 1e-04 | |
#> │ └─check.conv.hess: <list> | |
#> │ ├─action: "warning" | |
#> │ └─tol: 1e-06 | |
#> ├─optCtrl: <list> | |
#> ├─tolPwrss: 1e-07 | |
#> ├─compDev: TRUE | |
#> └─nAGQ0initStep: TRUE | |
# Turn it off approach: | |
## 1) more lenient tol for gradient | |
mod2 <- glmer( | |
fm, CYC_2022, binomial(), | |
control = glmerControl(check.conv.grad = .makeCC("warning", tol = 0.005, relTol = NULL)) | |
) | |
mod2@optinfo$feval | |
#> [1] 1741 | |
-2 * as.numeric(logLik(mod2)) | |
#> [1] 1227.308 | |
## 2) turn check off | |
mod3 <- glmer( | |
fm, CYC_2022, binomial(), | |
control = glmerControl(calc.derivs = FALSE) | |
) | |
mod3@optinfo$feval | |
#> [1] 1741 | |
-2 * as.numeric(logLik(mod3)) | |
#> [1] 1227.308 | |
# Do better approach: | |
## 1) Use stricter precision: (see `?convergence`) | |
mod4 <- glmer( | |
fm, CYC_2022, binomial(), | |
control = glmerControl( | |
optimizer = "Nelder_Mead", | |
optCtrl = list( | |
# factors of -1e3 improves only slightly | |
FtolAbs = 1e-8, | |
FtolRel = 1e-18, | |
XtolRel = 1e-10 | |
) | |
) | |
) | |
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : | |
#> Model failed to converge with max|grad| = 0.00328883 (tol = 0.002, component 1) | |
mod4@optinfo$feval | |
#> [1] 1662 | |
mod4@optinfo$conv | |
#> $opt | |
#> [1] 0 | |
#> | |
#> $lme4 | |
#> $lme4$code | |
#> [1] -1 | |
#> | |
#> $lme4$messages | |
#> [1] "Model failed to converge with max|grad| = 0.00328883 (tol = 0.002, component 1)" | |
-2 * as.numeric(logLik(mod4)) | |
#> [1] 1227.308 | |
## 2) Switch optimizer: bobyqa | |
mod5 <- glmer( | |
fm, CYC_2022, binomial(), | |
control = glmerControl( | |
optimizer = "bobyqa", | |
# optCtrl = list(rhobeg = 2e-1, rhoend = 2e-3) # to force failure | |
) | |
) | |
mod5@optinfo$optimizer | |
#> [1] "bobyqa" | |
mod5@optinfo$feval | |
#> [1] 427 | |
-2 * as.numeric(logLik(mod5)) | |
#> [1] 1227.308 | |
## 3) Play around with starting values back on NelderMead | |
mod6 <- glmer( | |
fm, CYC_2022, binomial(), | |
start = list( | |
theta = mod5@theta, | |
fixef = mod5@beta # only for glmer | |
) | |
) | |
mod6@optinfo$optimizer | |
#> [1] "Nelder_Mead" | |
mod6@optinfo$feval | |
#> [1] 620 | |
-2 * as.numeric(logLik(mod5)) | |
#> [1] 1227.308 | |
c(mod6@beta, mod6@theta) - c(mod5@beta, mod5@theta) | |
#> [1] -6.274439e-06 7.429766e-06 -8.882559e-06 -1.194653e-05 -8.731270e-07 2.273223e-06 | |
#> [7] -2.006810e-05 1.295990e-05 -2.667987e-05 2.188977e-05 4.519242e-06 8.099356e-08 | |
# Misc: reading `verbose = TRUE` output | |
mod_debug <- glmer( | |
fm, CYC_2022, binomial(), | |
control = glmerControl(optimizer = "bobyqa"), | |
verbose = TRUE # c(theta, beta) | |
) | |
#> start par. = 1 0 1 1 fn = 1248.258 | |
#> At return | |
#> eval: 92 fn: 1227.8373 par: 1.10537 -0.332073 0.303904 0.535802 | |
#> start par. = 1.105373 -0.3320734 0.3039045 0.5358022 1.81857 -0.4116346 -0.3577637 -0.3466247 -0.170979 -0.002003776 -0.4427596 0.05333196 fn = 1227.823 | |
#> At return | |
#> eval: 427 fn: 1227.3079 par: 1.11018 -0.342661 0.316524 0.534006 1.96113 -0.469911 -0.379738 -0.368217 -0.176009 0.000181727 -0.467381 0.0570098 | |
mod3@theta | |
#> [1] 1.1101565 -0.3425725 0.3158877 0.5339909 | |
mod3@beta | |
#> [1] 1.9613926942 -0.4697956567 -0.3798052310 -0.3685430928 -0.1760139897 | |
#> [6] 0.0003730676 -0.4677616340 0.0575529287 | |
# Misc: descriptions of optimizer options (see also `?convergence`) | |
unname(unstack(nloptr::nloptr.get.default.options()[, c("description", "name")])) | |
#> | |
#> algorithm This option is required. Check the NLopt website for a description of the algorithms. | |
#> check_derivatives The option check_derivatives can be activated to compare the user-supplied analytic gradients with finite difference approximations. | |
#> check_derivatives_print The option check_derivatives_print controls the output of the derivative checker (if check_derivatives==TRUE). All comparisons are shown ('all'), only those comparisions that resulted in an error ('error'), or only the number of errors is shown ('none'). | |
#> check_derivatives_tol The option check_derivatives_tol determines when a difference between an analytic gradient and its finite difference approximation is flagged as an error. | |
#> ftol_abs Stop when an optimization step (or an estimate of the optimum) changes the function value by less than ftol_abs. Criterion is disabled if ftol_abs is non-positive (default). | |
#> ftol_rel Stop when an optimization step (or an estimate of the optimum) changes the objective function value by less than ftol_rel multiplied by the absolute value of the function value. If there is any chance that your optimum function value is close to zero, you might want to set an absolute tolerance with ftol_abs as well. Criterion is disabled if ftol_rel is non-positive (default). | |
#> maxeval Stop when the number of function evaluations exceeds maxeval. This is not a strict maximum: the number of function evaluations may exceed maxeval slightly, depending upon the algorithm. Criterion is disabled if maxeval is non-positive. | |
#> maxtime Stop when the optimization time (in seconds) exceeds maxtime. This is not a strict maximum: the time may exceed maxtime slightly, depending upon the algorithm and on how slow your function evaluation is. Criterion is disabled if maxtime is non-positive (default). | |
#> population Several of the stochastic search algorithms (e.g., CRS, MLSL, and ISRES) start by generating some initial population of random points x. By default, this initial population size is chosen heuristically in some algorithm-specific way, but the initial population can by changed by setting a positive integer value for population. A population of zero implies that the heuristic default will be used. | |
#> print_level The option print_level controls how much output is shown during the optimization process. Possible values: 0 (default): no output;\n1: show iteration number and value of objective function; 2: 1 + show value of (in)equalities; 3: 2 + show value of controls. | |
#> print_options_doc If TRUE, a description of all options and their current and default values is printed to the screen. | |
#> ranseed For stochastic optimization algorithms, pseudorandom numbers are generated. Set the random seed using ranseed if you want to use a 'deterministic' sequence of pseudorandom numbers, i.e. the same sequence from run to run. If ranseed is 0 (default), the seed for the random numbers is generated from the system time, so that you will get a different sequence of pseudorandom numbers each time you run your program. | |
#> stopval Stop minimization when an objective value <= stopval is found. Setting stopval to -Inf disables this stopping criterion (default). | |
#> tol_constraints_eq The parameter tol_constraints_eq is a vector of tolerances. Each tolerance corresponds to one of the equality constraints. The tolerance is used for the purpose of stopping criteria only: a point x is considered feasible for judging whether to stop the optimization if abs( eval_g_ineq(x) ) <= tol. For equality constraints, a small positive tolerance is strongly advised in order to allow NLopt to converge even if the equality constraint is slightly nonzero. By default the tolerances for all equality constraints are set to 1e-8. | |
#> tol_constraints_ineq The parameter tol_constraints_ineq is a vector of tolerances. Each tolerance corresponds to one of the inequality constraints. The tolerance is used for the purpose of stopping criteria only: a point x is considered feasible for judging whether to stop the optimization if eval_g_ineq(x) <= tol. A tolerance of zero means that NLopt will try not to consider any x to be converged unless eval_g_ineq(x) is strictly non-positive; generally, at least a small positive tolerance is advisable to reduce sensitivity to rounding errors. By default the tolerances for all inequality constraints are set to 1e-8. | |
#> xtol_abs xtol_abs is a vector of length n (the number of elements in x) giving the tolerances: stop when an optimization step (or an estimate of the optimum) changes every parameter x[i] by less than xtol_abs[i]. Criterion is disabled if all elements of xtol_abs are non-positive (default). | |
#> xtol_rel | |
# Misc: Julia goes brrr | |
library(jlme) | |
jlme_setup() | |
jmod <- jlmer(fm, CYC_2022, binomial()) | |
jmod$objective | |
#> [1] 1227.294 | |
jmod$optsum | |
#> <Julia object of type OptSummary{Float64}> | |
#> Initial parameter vector: [1.5566337764098648, -0.2983581113766062, -0.2987076608434313, -0.290090201453699, -0.17432777433340768, -0.02792005950057892, -0.3334051584697911, -0.017249002400788725, 1.0, 0.0, 1.0, 1.0] | |
#> Initial objective value: 1251.3975422976225 | |
#> | |
#> Optimizer (from NLopt): LN_BOBYQA | |
#> Lower bounds: [-Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, 0.0, -Inf, 0.0, 0.0] | |
#> ftol_rel: 1.0e-12 | |
#> ftol_abs: 1.0e-8 | |
#> xtol_rel: 0.0 | |
#> xtol_abs: [1.0e-10, 1.0e-10, 1.0e-10, 1.0e-10] | |
#> initial_step: [1.5566337764098648, -0.2983581113766062, -0.2987076608434313, -0.290090201453699, -0.17432777433340768, -0.02792005950057892, -0.3334051584697911, -0.017249002400788725, 0.75, 1.0, 0.75, 0.75] | |
#> maxfeval: -1 | |
#> maxtime: -1.0 | |
#> | |
#> Function evaluations: 1077 | |
#> Final parameter vector: [1.9665793095182482, -0.47436556264485136, -0.3811626648769408, -0.36979812333674433, -0.1755519521595563, 0.0018053267277041178, -0.4698183815662918, 0.0592392817338059, 1.1149136973662743, -0.34735106321068765, 0.31789307638374403, 0.5347258540943893] | |
#> Final objective value: 1227.2943095482558 | |
#> Return code: FTOL_REACHED | |
jmod$beta | |
#> [1] 1.966579310 -0.474365563 -0.381162665 -0.369798123 -0.175551952 0.001805327 -0.469818382 | |
#> [8] 0.059239282 | |
jmod$theta | |
#> [1] 1.1149137 -0.3473511 0.3178931 0.5347259 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment