Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save inikishev/7c446fe4ddfb5c7e5611498bf2b7b82c to your computer and use it in GitHub Desktop.
Save inikishev/7c446fe4ddfb5c7e5611498bf2b7b82c to your computer and use it in GitHub Desktop.
# pylint: disable = reimported
# ruff: noqa: F811
# ------------------------ OTHER ONES I HAVE INSTALLED ----------------------- #
from pytorch_optimizer import ADOPT, AdaBelief #, ...
from OPTAMI import GradientDescent, SimilarTriangles, CubicRegularizedNewton, BasicTensorMethod, DampedNewton, NesterovAcceleration, NearOptimalAcceleration, ProxPointSegmentSearch, NATA, Optimal
from heavyball import ForeachSFAdamW, PaLMForeachSFAdamW, ForeachADOPT, ForeachMuon, ForeachLaProp, MuonLaProp, ForeachSOAP, PaLMForeachSOAP, PrecondScheduleForeachSOAP, PrecondSchedulePaLMForeachSOAP, ForeachPSGDKron, ForeachPurePSGD, ForeachCachedDelayedPSGDKron, ForeachCachedPSGDKron, ForeachDelayedPSGD
from schedulefree import SGDScheduleFreeClosure, AdamWScheduleFreeClosure, RAdamScheduleFreeClosure, ScheduleFreeWrapper
from timm.optim import AdaBelief, Adafactor #, ...
# ----------------------------------- MINE ----------------------------------- #
from torchzero.optim import Adagrad, AdamW #, ...
# ----------------------------------- misc ----------------------------------- #
from .Optimizer_PyTorch import AdaBound, AdaBoundW, Adam, ErrorFeedbackSGD, ExtraAdam, ExtraSGD, OptimisticAdam, OMD, SGD, Storm
from .PersonalCodeRepository import SVRG, ErrorFeedbackSGD
from .sota_data_augmentation_and_optimizers import RAdam, DeepMemory, Lookahead
from .Awesome_Optimizers import * # insane number of them
from .moai import * # insane number of them
from .collie import AdaLomo, Adan, Lion, Lomo, SophiaG
from .pyutils import Adam_GC,DAdaptAdam,DAdaptSGD,GLD,Lookahead,Prodigy,RAdam,SAM,SGD_GC,SMTP
from .Best_Deep_Learning_Optimizers import madgrad_wd, Ranger, Sls, Adahessian, AdaMod, DeepMemory, DiffGrad, diffRGrad, DiffMod
from .over9000 import AdaBelief, AdaMod, Adan, Apollo, DiffGrad, Lamb, Lookahead, Madam, MADGRAD, AdamW, RAdam, PlainRAdam, Novograd, Ralamb, Ranger, RangerLars
from .cringe_live import AdaAbs, AdaptiveCompass, Clybius, Compass, DOPE, ExMachina, FARMSCropV3, FCompass, SAVEUS
from .Personalized_Optimizers import FARMSCrop, FARMSCropV2, FCompass, FishMonger, FMARSCrop, FMARSCrop_ExMachina, FMARSCropV2
# ----------------------------------- repos ---------------------------------- #
from .kron_torch import Kron
# Kronecker-factored preconditioner
from .MEKF_MAME import MEKF, MEKF_MA, Lookahead
# Modified Extended Kalman Filter with generalized exponential Moving Average
from .NGD_SGD import NGD
# Natural gradient descent
from .psgd_torch import LRA, Affine, Kron, Newton, XMat
# Preconditioned gradient descent
from .psiDAG import UniversalSGD
# Universal Stochastic Gradient Method
from .RiemannianSGD import HyperboloidRSGD, PoincareRSGD
# Non euclidian space GD
from .StochasticMirrorDescent import SMD_compress, SMD_qnorm
# Stochastic Mirror Descent
from .SUG.SUG import SUG
# Adaptive stochastic gradient method based on the universal gradient method
from .VTTCG import VTTCG, AdaBelief
# Variable three-term conjugate gradient method
from .FAdam import FAdam, AnyPrecisionAdamW
# Fisher Adam
from .dfw import DFW
# Deep Frank Wolfe
from .coolmomentum import Coolmom, Coolmomentum, Coollin
# CoolMomentum: a method for stochastic optimization by Langevin dynamics with simulated annealing
from .bgd import BGD
# Bayesian Gradient Descent
from .torchimize import GNA
# Gauss-Newton algorithm
from .autosgm import AutoSGM
# AutoSGM: A Unifying Framework for Accelerated Learning
from .torch_kfac import KFACOptimizer, EKFACOptimizer
from .KFAC import KFAC, EKFAC, GKFAC
from .torch_kfac2 import KFAC # MAYBE GOOD
# Kronecker-Factored Approximate Curvature
from .SGDPH.sgdph import sgdph
# SGD with Partial Hessian
from .LaplacianSmoothing_GradientDescent import LS_SGD # dont work
from .LS_MCMC import LSpSGLD, LSSGLD, pSGLD, SGLD # LSpSGLD, LSSGLD require "vecs" whatever that is
from .DP_LSSGD import LSSGD, LSSGDTorch # dont work
from .dlt import LSSGD, LSSGDTorch # dont work
# Laplacian Smoothing Gradient Descent
from .adashift import AdaShift
# AdaShift: Decorrelation and Convergence of Adaptive Learning Rate Methods
from .soap import SOAP
# Shampoo with Adam in the Preconditioner's eigenbasis (SOAP).
from .PAL import PalOptimizer
# PAL - Parabolic Approximation Line Search for DNNs
from .LABPAL import GOLSI, LabPal, PalOptimizer, PLS, Sls, SLS
# The Large-Batch Parabolic Approximation Line Search (LABPAL)
from .lion import Lion, LionForEach # LionForEach is not in __init__ by default so idk if it is tested
# EvoLved Sign Momentum (Symbolic Discovery of Optimization Algorithms)
from .adam_atan2 import AdamAtan2, AdamAtan2ForEach, AdamAtan2WithWassersteinReg # Only AdamAtan2 in __init__
# Adam with atan2 instead of epsilon (Scaling Exponents Across Parameterizations and Optimizers)
from .grokfast import GrokFastAdamW
# Grokfast, Accelerated Grokking by Amplifying Slow Gradients
from .lbfgs import LBFGSNew, LBFGSB
# Improved LBFGS and LBFGS-B optimizers.
from .AdEMAMix import AdEMAMix, AdEMAMixDistributedShampoo
# The AdEMAMix Optimizer: Better, Faster, Older (mixture of two EMAs)
from .parameterfree import COCOB, KT, cKT
# Parameter-Free Optimizers
from .SimulatedAnnealing import SimulatedAnnealing
# Simulated Annealing
from .Positive_Negative_Momentum import PNM, AdaPNM
# Positive-Negative Momentum: Manipulating Stochastic Gradient Noise to Improve Generalization
from .AngularGrad import AdaBelief, diffgrad, cosangulargrad, tanangulargrad
# AngularGrad: A New Optimization Technique for Angular Convergence of Convolutional Neural Networks
from .PIDOptimizer import PIDOptimizer
# A PID Controller Approach for Stochastic Optimization of Deep Networks
from .esgd import ESGD
# Stochastic non-convex second order optimizer
from .pytorch_soo import * # a lot of them
# Second Order Optimizers for Machine Learning
from .curveball import CurveBall, CurveBallInterleave
# Small Steps and Giant Leaps: Minimal Newton Solvers for Deep Learning
from .torch_second_order import GradientDescent, LevenbergMarquardt
# Levenberg–Marquardt algorithm
from .grnewt import NewtonSummary, NewtonSummaryVanilla, NewtonSummaryFB, NewtonSummaryUniformAvg
# Adapting Newton's Method to Neural Networks through a Summary of Higher-Order Derivatives
from .pytorch_storm import STORM
# stochastic first order trust region method
from .pytorch_trish import TRish
# A Stochastic Trust Region Algorithm Based on Careful Step Normalization
from .fate_llm import ZerothOrderOptimizer, KSeedZerothOrderOptimizer
# "This optimizer performs a `random` walk update for the parameters of the model."
from .FederatedScope_FedKSeed import MeZOBiasOptimizer
from .fusion_bench import MeZO
# MEZO
from .NewtonCG import NewtonCG
# Newton-CG algorithm with backtracking line-search
from .dreamplace import NesterovAcceleratedGradientOptimizer
# Nesterov's implementation of e-place algorithm (???) (THIS NOT NESTEROV MOMENTUM ITS NESTEROV SOMETHING ELSE)
from .sls_ffa import Sls, SlsAcc, SlsEg, SVRG, AdaBound, CocobBackprop, CocobOns, PLS
# Stochastic line search (fork with more stufff)
from .sps import Sps
# Stochastic Polyak Step-size
from .ada_sls import AdaSLS
# Adaptive Gradient Methods Converge Faster with Over-Parameterization
from .sls import Sls, SlsAcc, SlsEg
# Stochastic line search
from .chop import PGD, PGDMadry, S3CM, PairwiseFrankWolfe, FrankWolfe
# constrained optimization for PyTorch
from .ncg_optimizer import LCG, BASIC
from .ncg_optimizer_ApostolosGreece import LCG, BASIC # fork, seems to have some kinds of changes
# nonlinear conjugate gradient
from .LPF_SGD import EntropySGD2, EntropyAdam, EntropySGD, SAM
# Low-Pass Filtering SGD for Recovering Flat Optima (but I don't think it has LPFSGD optimizer, unless EntropySGD is one)
from .optimizer import SAM, NelderMead, PatternSearch
# bro made a nelder mead (Loss Landscapes are All You Need: Neural Network Generalization Can Be Explained Without the Implicit Bias of Gradient Descent)
from .convis import FiniteDifferenceGradientOptimizer
# apparently second order fd
from .fullbatch import AdaptiveGradientClipping, FISTA, FISTALineSearch, SGDLineSearch, LARS, LBFGS, SAM, SGD_AGC, RestartingLineSearch, NonMonotoneLinesearch, WolfeGradientDescent
# Training vision models with full-batch gradient descent and regularization
from .peps_torch_feat_czx import SGD_MOD, LBFGS_MOD
# SGD with backtracking line search
from .Target_Based_Surrogates_For_Stochastic_Optimization import Ada_FMDOpt, Adam_FMDOpt, Diag_Ada_FMDOpt, GULF2, LSOpt, MD_FMDOpt, Online_Newton_FMDOpt, Sadagrad, SGD_FMDOpt, SLS_FMDOpt, SVRG
# Target Based Surrogates For Stochastic Optimization (some crazy stuff)
from .SDLS import SDLS
# Armijo Back-tracking line search on Training DNN
from .hessianfree import HessianFree
# Deep learning via Hessian-free optimization (need to install backpack)
from .salsa.SaLSA import SaLSA
# SALSA - Stable Armijo Line Search Adaptation
from .nitorch import OGM, BacktrackingLineSearch
# optimizers from neuroimaging library
from .qori_aziz_sa import SimulatedAnnealing
# SA from someones homework
from .neural_net_optimizers import GeneticAlgorithm, ParticleSwarm
# dfo
from .NNAIF import CMAES, EMNA, IMFIL, NNAIF, SGPGD, RESNETEULER
# Neural Network Accelerated Implicit Filtering: Integrating Neural Network Surrogates With Provably Convergent Derivative Free Optimization Methods
from .befree import CurveBall, HessianFree, Newton, SimplifiedHessian
# On the New method of Hessian-free second-order optimization
from .bayesian_snn import BayesBiSNN, GaussianBayesOptimizer
# Bayesian Continual Learning via Spiking Neural Networks (I think it needs layers from that lib too)
from .ML_APTS import APTS, LocalTR, TR, TRAdam
# Additively preconditioned trust-region strategies for machine learning
from .torchmin import Minimizer, ScipyMinimizer
from .pytorch_minimize import MinimizeWrapper, BasinHoppingWrapper, DualAnnealingWrapper, DifferentialEvolutionWrapper
# scipy minimize (ha ha mine is better)
from .geoopt import SGRHMC, RHMC, RSGLD, RiemannianAdam, RiemannianLineSearch, RiemannianSGD, SparseRiemannianAdam, SparseRiemannianSGD
# Riemannian Adaptive Optimization Methods (maybe only works on geoopt layers idk)
from .pykg2vec import RiemannianOptimizer
# from "Python library for knowledge graph embedding" (but I changed it to affect all layers)
from .M_FAC import MFAC
# M-FAC: Efficient Matrix-Free Approximations of Second-Order Information
from .ddpnopt import Step, RmsDDP, AdamDDP
# DDPNOpt: Differential Dynamic Programming Neural Optimizer
from .singd import SINGD
# KFAC-like Structured Inverse-Free Natural Gradient Descent
from .sirfshampoo import SIRFShampoo
# SIRFShampoo: Structured inverse- and root-free Shampoo in PyTorch
from .StructuredNGD_DL import KFACOptimizer, LocalOptimizer
# Matrix-multiplication-only KFAC (Simplifying Momentum-based Positive-definite Submanifold Optimization)
from .Muon import Muon
# MomentUm Orthogonalized by Newton-schulz.
from .orth_optim import orthogonalise
# Orthogonalising gradients to speed up neural network optimisation. `orthogonalise(AdamW)(model.parameters(), lr = 1e-3)`
from .torch_pso import ParticleSwarmOptimizer, GenerationalPSO,AutotuningPSO,RingTopologyPSO,ChaoticPSO,GenericPSO,AcceleratedPSO,SineCosineAlgorithm,ImprovedSineCosineAlgorithm
# Particle Swarm Optimization
from .langevin_sampling import SGLD, pSGLD
# Sampling with gradient-based Markov Chain Monte Carlo approaches
from .adopt import ADOPT
# Modified Adam Can Converge with Any β2 with the Optimal Rate
from .fsdp_optimizers import SOAP, Kron, Muon, KronMars
# optimizers with FSDP support
from .NGPlus import NGPlus, o_NGPlus, o_NGPlus_Block, create_oNG_optimizer
# NG+: A new second-order optimizer for deep learning
from .MARS_AdamW import MarsAdamW
# MARS: Unleashing the Power of Variance Reduction for Training Large Models
from .MSAM import AdamW, AdamW_MSAM, AdamW_SAM, ESAM, LookSAM, MSAM, SAM, SGD
# Momentum-SAM: Sharpness Aware Minimization without Computational Overhead
from .adasub import SubHes
# Stochastic Optimization Using Second-Order Information in Low-Dimensional Subspaces
from .MomSPS import MomSPS, MomSPS_smooth
# Stochastic Polyak Step-sizes and Momentum
from .momo import Momo, MomoAdam
# Momentum Models for Adaptive Learning Rates
from .DIMAT import CDMSGD, CDSGD, CGA, DSMA, LDSGD, SGP, SwarmSGD
# Decentralized Iterative Merging-And-Training for Deep Learning Models
from .Noise_stability_optimization import BSAM, NSM, SAM, RSAM
# noise stability optimization algorithm, Hessian-based regularization approach for finding flat minima (NSM)
from .Exponentiated_Gradient import EGPM
# exponentiated gradient (EG) algorithm and plus-minus variant
from .zeroptim import MeZO, SmartES
# zero-order optimization techniques
from .GDPolyak import GDPolyak
# Gradient descent with adaptive stepsize converges (nearly) linearly under fourth-order growth
from .APROX import Truncated, TruncatedAdagrad
# APROX: Robust Stochastic Optimization Algorithms
from .SVRG_Pytorch import SVRG
# efficient variant of SVRG that relies on mini-batching implemented in Pytorch
from .poincare_embeddings import RiemannianSGD
# actually working riemannian SGD
from .tram_optimizer import TRAM
# Trust Region Aware Minimization
from .gsam import GSAM
# Surrogate Gap Guided Sharpness-Aware Minimization
from .ReinventingWheel import FTRLP
# FTRL-proximal algorithm (Follow-the-Regularized-Leader and Mirror Descent: Equivalence Theorems and L1 Regularization, H. B. Mcmahan. AISTATS 2011.)
from .OLoptim import FTML, FTRL_Proximal, OSD, SGDOL_global, SGD_globLR, STORM
# Online & Stochastic optimization algorithms for deep learning
from .metaopt import SGD_Multi_LR, SGD_Quotient_LR
# Online hyperparameter optimization by real-time recurrent learning
from .core_optimizer import CoRe
# Continual Resilient (CoRe) Optimizer
from .Seminar import Ada_Grad, FTRL, nAda_Grad, nFTRL, nKT, nOGD, OGD
# "Implementation of different algorithms and their normalized counterparts in the pytorch framework"
from .Recommendation_System_Method_Reproduce import FTRL
from .Code import FTRL, OBC
from .ftrl import FTRL
from .DP_FTRL import FTRLOptimizer # official implementation by Google
# Follow-the-Regularized-Leader
from .smart_grid import AdaX
# AdaX: Adaptive Gradient Descent with Exponential Long Term Memory
from .nerf_atlas import UniformAdam
# something crazy with solves and laplacian matrix??
from .mlopt import Adahessian, Goldstein, Normalized_Optimizer, OneBit_Adam, SAM, Alternate_SAM, Alternate_SAM_v2, Alternate_SAM_v3, AdamS_v1, ASAM_ON, Sketch_Adam, SophiaG, Sophus, GN_DOM_SGD, GN_BULK_SGD, DOM_SGD, BULK_SGD
# crazy stuff (no descriptions)
from .subgd import PCAOptimizer
# Few-Shot Learning by Dimensionality Reduction in Gradient Space (needs some kind of config)
from .RFR_NeurIPS23 import RFR
# robust fairness regularization (RFR) - Chasing Fairness under Distribution Shift: a Model Weight Perturbation Approach
from .A_Deep_Learning_Optimizer_Based_on_Grunwald_Letnikov_Fractional_Order_Definition import FCSGD_G_L, FCAdam_G_L
# A Deep Learning Optimizer Based on Grunwald Letnikov Fractional Order Definition
from .VFOGD_PF_and_Its_Application_in_Deep_Learning import VFOSGD_PF, VFOAdam_PF
# VFOGD_PF and Its Application in Deep Learning
from .staleness_corrected_momentum import SCMSGD, SCMTDProp, OptChain, FixedSGD
# Correcting Momentum in Temporal Difference Learning
from .DPSGD import DPSGD
# Pytorch implentation of tf.privacy.DPGradientDescentGaussianOptimizer
from .DPtorch import JDPSGD
# Improving Deep Learning with Differential Privacy using Gradient Encoding and Denoising
from .optimizer2 import AdaBound, AdaGC, AdaMod, Adan, Yogi
# Adaptive Optimization Algorithm with Gradient Bias Correction (AdaGC)
from .ProxSPS import SPS, ProxAdam
# Polyak step sizes with weight decay in Pytorch
from .bb_dl import BB
# Barzilai-Borwein-based Adaptive Learning Rate for Deep Learning
from .Adaptive_learning_rate_optimization_algorithm_with_dynamic_bound_based_on_Barzilai_Borwein_method import BBbound, AdaBBbound
# Adaptive learning rate optimization algorithm with dynamic bound based on Barzilai-Borwein method
from .mL_BFGS import SlimQN, BlockSlimQN, KFACOptimizer, LBFGSOptimizer, SGDOptimizer
# A Momentum-based L-BFGS for Distributed Large-Scale Neural Network Optimization
from .Noisy_SGD import GGDO1, GGDO2, GGDO3, GGDO4, GGDO5, pSGLD,SGLD
# Adaptively Preconditioned Stochastic Gradient Langevin Dynamics
from .adamaio import AdamAIO
# All-In-One Adam
from .adams import Adams, AdamUCB, AdamCB
# Exploiting Uncertainty of Loss Landscape for Stochastic Optimization
from .AdaTS import AdaTS, AdaITS, AdamTS, YOGITS
# ???? can't find anything about "AdaTS algorithm"
from .MNSAM import MNSAM, SAM, SUM, Adan
# Sharpness-Aware Minimization Method with Momentum Acceleration for Deep Neural Networks
from .innaprop import INNAprop
# INNAprop, a second-order optimization algorithm for deep learning
from .M3Learning import AdaHessian, TRCG
# Trust-Region Conjugate Gradient
from .opt_for_pinns import Adam_LBFGS, Adam_LBFGS_GD, Adam_LBFGS_NNCG, ALRMAG, GD, NysNewtonCG, PolyakGD, PolyakLBFGS, SketchyGN, SketchySGD
# bunch of stuff from "Challenges in Training PINNs: A Loss Landscape Perspective"
from .deepxde import NNCG
# NysNewtonCG, a damped Newton-CG method that uses Nyström preconditioning
from .alternating_layers import DampedNewton
# seemingly a good DampedNewton (they apply it to final layer rest optimized by 1st order)
from .nanoGPTSLS import AdamSLS, KenSLS
# training nanoGPT with SLS
from .Skoltech3D import BatchBFGS, BatchGD
# batch bfgs? whats that (those don't inherit Optimizer but have same signature)
from .ICNN_verification import SdLBFGS, SdLBFGS0
# Stochastic quasi-Newton methods for nonconvex stochastic optimization
from .ZO_AdaMM_vs_FO_AdaMM import AdaMM
# ON THE CONVERGENCE OF ADAM AND BEYON
from .AdaSTE import BayesBiNN, FenBPOpt, FenBPOptQuad, FenBPOptProx, MDTanhOpt
# AdaSTE: An adaptive Straight-Through Estimator to Train Binary Neural Networks, Training Binary Neural Networks using the Bayesian Learning Rule
from .alopex import Alopex
# ALgorithm Of Pattern EXtraction (ALOPEX) 99/B version (gradient free)
from .statopt import QHM, SALSA, SSLS, SASA, SLOPE
# ???idk
from .superpolyak import SuperPolyak, NewtonCG
# SuperPolyak subgradient method - first-order method for solving (possibly) nonsmooth equations/optimization problems
from .GaussNewtonPolyak import GNP
# A linearly convergent Gauss-Newton subgradient method for ill-conditioned problems
from .ntd import NTD, Polyak
# Normal-Tangent-Descent (A nearly linearly convergent first-order method for nonsmooth functions with quadratic growth)
from .nuqls import LaplaceGGN
# Uncertainty Quantification with the Empirical Tangent Kernel
from .SimuRLacra import GSS
# Golden Section Search (I think this is gradient free and for univariate funcs)
from .gcopt import GCOptimizer
# Gaussian continuation optimizer (wraps another optimizer, and ultra-recent)
from .k_fac import KFACOptimizer,KFACIDOptimizer,SKFACOptimizer,EKFACOptimizer,KBFGSOptimizer,KBFGSLOptimizer,KBFGSL2LOOPOptimizer,KBFGSLMEOptimizer,NGDOptimizer
# biggest k-fac repo (i fixed all acc_stats)
from .proxyprox import ProxyProx
# konstmish's mysterious ProxyProx (has step as well as inner_step methods)
from .SWANOptimizer import SWAN
# SWAN (SGD with Whitening And Normalization)
from .sparse_szo import DuelingEvolutionOptimizer, VanillaEvolutionOptimizer, OneSideEvolutionOptimizer, TwoSideEvolutionOptimizer, FirstOrderOptimizer, FirstOrderBanditOptimizer
# Sparse Perturbations for Improved Convergence in Stochastic Zeroth-Order Optimization
from .PSGD_Nuon import Nuon, AutoNuon
# Use single sided whitening that is dynamic and learned instead of being instantanious like Muon
from .coherent_gradients import RA3,RM3, M3
# Weak and Strong Gradient Directions: Explaining Memorization, Generalization, and Hardness of Examples at Scale
from .eva import Eva, EvaExperimental, KFAC, AdaKFAC, AdaKFAC2, KFACSAM, MFAC, Shampoo
# Eva: Practical Second-order Optimization with Kronecker-vectorized Approximation (pretty sure they modify the gradient and don't update params)
from .natural_galore import SubSpaceAdamW
# GaLore extension - Natural Gradient Descent in low rank subspace
from .galore_torch import GaLoreAdafactor, GaLoreAdamW, GaLoreAdamW8bit
# Memory-Efficient LLM Training by Gradient Low-Rank Projection
from .compass_optimizer import CompassExperimental4Bit, CompassExperimental8Bit, CompassExperimental8BitBNB, Compasstic, LPFAdamW, AdamW, RMSProp
# A modification of original ADAMW optimizer by replacing momentum moment with smoothing filter.
from .sgd_sai import SGD_sai
# No More Adam: Learning Rate Scaling at Initialization is All You Need
from .unrl import EligibilityTraceOptimizer,KFACOptimizer
# optimizers from Reinforcement Learning algorithms library
from .second_order_optimization_NQS import SecondOrderOpt
# Second-order Optimisation strategies for neural network quantum states
from .ldadamw_torch import LDAdamW
# Low-Dimensional Adam - Adaptive Optimization from Low-Dimensional Gradient Statistics.
from .pydrsom import DRSOMB, DRSOMB2, DRSOMK, DRSOMVec, KDRSOM
# dimension-reduced second-order method (DRSOM)
from .AdaGL import AdaGL, FractionalSmoothLoss
# deep learning optimizer that combines fractional-order calculus with adaptive techniques. Using Grünwald–Letnikov derivatives
from .mkor import MKOR
# Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 Updates
from .sn_sm import GenericOptim, AdamWSN, AdamWSNG
# GenericOptim is maybe it, Subset-Norm and Subspace-Momentum: Faster Memory-Efficient Adaptive Optimization with Convergence Guarantees
from .OptML_Project import Adasub, Adahessian
# Comparison of second-order optimizers on transformers
from .MARS import MARS, ADOPT, Muon, AdamW
# MARS (Make vAriance Reduction Shine
from .pytorch_velo import VeLO
# learned optimizer LSTM (just a pytorch wrapper for jax optimizer)
from .mctorch import ConjugateGradient
# other optimizers from this are for manifolds only, this works on any layers
from .modded_nanogpt import Kron
# kron fork by evanatyourservice with recent changes
from .smplifyx import TrustRegionNewtonCG, LBFGS
# trust region newton cg
from .widedeepnetworks import ESS, HMC
# Gaussian Process Behaviour in Wide Deep Neural Networks (zeroth order?)
from .alf import NeroPlus, AdamTF
# keeps the norm of each parameter vector fixed and mean at zero during the optimization process
from .SOAP_MUON import SOAP_Muon
# SOAP + Muon = SOAP_Muon
from .psgd_kron_lra import KronLRA
# LRA per kron factor
from .psgd_kron_contraction import Kron
# joint learning of Xilin Li's criterion 3 as well as Madeleine Udell's contraction factor on the Lie group
from .Moonlight import Muon
# Muon with lr normalization based on param size and maybe other stuff
from .FedPD import PSVRG, PSGD, FedPD_SGD, FedPD_VR
# Federated Primal-Dual Algorithm
from .llmart import GreedyCoordinateGradient
# greedy coordinate gradient
from .EOPC import Rosen
# Optimizing Mode Connectivity for Class Incremental Learning
from .Autoenv import IEKF
# iterative extended kalman filter optimizer
from .fastr import FastrD, FastrN, STORMplus, StormPlus
# Fully Adaptive STochastic Recursive-momentum
from .NeuralNetwork import SLBI, SLBI2, SLBI_ADAM_ToolBox, SLBI_SGD_ToolBox
from .DessiLBI import SLBI, SLBI_ToolBox
# Exploring Structural Sparsity of Deep Networks via Inverse Scale Spaces
from .dowg import DoWG, CDoWG
# DoWG Unleashed: An Efficient Universal Parameter-Free Gradient Descent Method
from .archai import CocobBackprop, CocobOns, Lamb
# microsofts NAS lib
from .coin_betting import SGDOL, Cocob, Recursive, Regralizer, Scinol2, ONSBet
# Parameter-free coin betting optimizers
from .dolphinflow import DolphinFlow
# recent muon/adamw like has a bunch of settings to tune https://github.com/cognitivecomputations/dolphinflow-optimizer
from .neosr import adamw_win, adan_sf, adamw_sf, adan, soap_sf, fsam
# from super resolution lib and stuff adapted from heavyball
from .recpre import SOAP, LionW, SophiaG, Lilith, ELLISAdam, IVON, ZeroShampooWithAdamGraftingOptimizer, OrthogonalNesterov
# recurrent pretraining
from .supertrainer2k import Adalite, Lilith
# idk
from .wu_nature_comms_2024 import NewStyleBatchFISTAOptim, NewStyleSingleFISTAOptim
# something insane
from .dd4ml import APTS,APTS_D, TrustRegion, TrustRegionLegacy
# Additively preconditioned trust-region strategies for machine learning. requires some type of condig and some type of subdomain_optimizer
from .koaning_io_more_descent_less_gradient import KeepStepping, KeepVaulting
# keeps stepping on single batch or maybe it was supposed to be a line search idk
from .CR import COMP
# Compact representations for recursive Hessian matrix estimates (similar to LBFGS)
from .MaxFactor import MaxFactor
# utra recent
from .scion import Scion
# Training Deep Learning Models with Norm-Constrained LMOs.
from .rapp import RAPPsgd, RAPPadam, ExtraAdagrad, ExtraAdam, ExtraSGD, EGplusAdam, EGplusSGD, LA, AdamLA, ExtraSGDLA, ExtraAdamLA, EGplusLA, EGplusAdamLA
# Stable Nonconvex-Nonconcave Training via Linear Interpolation
from .storm_plus import STORMplus
# STORM+
from .AccSGD import AccSGD
# On the insufficiency of existing momentum schemes for Stochastic Optimization
from .AdaInject import AdaBelief, AdaBeliefInject, AdamInject, diffGrad, diffGradInject,Radam, RadamInject
# AdaInject: Injection Based Adaptive Gradient Descent Optimizers for Convolutional Neural Networks
from .PowerSign_and_AddSign import AddSign, PowerSign
# https://github.com/Neoanarika/Implementing-the-PowerSign-and-AddSign-rule
from .AddSign_PowerSign_in_PyTorch import AddSign, PowerSign, LinearInternalDecay, CosineInternalDecay, RestartCosineInternalDecay
# https://github.com/cydonia999/AddSign_PowerSign_in_PyTorch
# Neural Optimiser search with Reinforcment learning
from .neumann_optimizer import Neumann, Neumann2
# https://github.com/jayybhatt/neumann-optimizer
# A Practical Optimization Algorithm for Deep Neural Networks (implicitly computes the inverse Hessian of each mini-batch to produce descent directions)
from .neural_search_optimizer import Optimizer_1
# https://github.com/daviddao/pytorch-neural-search-optimizer
# Neural Optimizer Search's Optimizer_1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment