-
-
Save chunsj/1bfafb96292ff7903af7a7414bd462b2 to your computer and use it in GitHub Desktop.
Bayesian Optimization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defpackage bo | |
(:use :common-lisp :alexandria :serapeum :trivia)) | |
(in-package :bo) | |
;;;; Bayesian Optimization based on Tree-structured Parzen Estimator and naive-Bayes. | |
;;; | |
;;; based on Rust code at https://docs.rs/tpe/0.1.1/tpe/ | |
;;; which is inspired by https://optuna.org/ | |
;;; which is inspired by http://hyperopt.github.io/hyperopt/ | |
;;; which is described in https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf | |
(define-symbol-macro ⊥ (error "Illegal evauation")) | |
(defmacro λ (args &body body) `(lambda ,args ,@body)) | |
(defvar *debug* nil | |
"Enable debug printouts.") | |
(defmacro dbg (fmt &rest args) | |
`(when *debug* | |
(format t ,fmt ,@args))) | |
;;;; Probability density estimators | |
(defun make-estimator (type samples) | |
(destructuring-case type | |
((double-float min max) (kde min max samples)) | |
((member &rest domain) (histogram domain samples)))) | |
(defgeneric sample (estimator) | |
(:documentation | |
"Sample a random value from the probability distribution of ESTIMATOR.")) | |
(defgeneric density (estimator x) | |
(:documentation | |
"Return the probability density of the distribution ESTIMATOR at X.")) | |
;;;;; Histogram estimator | |
(defstruct (histogram (:constructor make-histogram (domain weights total))) | |
(domain ⊥ :type sequence) | |
(weights ⊥ :type (vector fixnum)) | |
(total ⊥ :type fixnum)) | |
(defun histogram (domain samples) | |
;; Seed with one sample per bucket as a uniform prior. | |
(loop with weights = (make-array (length domain) :element-type 'fixnum :initial-element 1) | |
for s in samples | |
do (incf (aref weights (or (position s domain) | |
(error "Sample ~s outside domain ~s" s domain)))) | |
finally (return (make-histogram domain weights (sum weights))))) | |
(defmethod density ((h histogram) category) | |
"Return the probability density point estimate for CATEGORY in HISTOGRAM." | |
(with-slots (domain weights total) h | |
(/ (aref weights (or (position category domain) | |
(error "Category ~s outside domain ~s" category domain))) | |
(coerce total 'f64)))) | |
(defmethod sample ((h histogram)) | |
(with-slots (domain weights total) h | |
(elt domain (count-if (fuel (random total)) weights)))) | |
;;;; Double float math | |
(defparameter *standard-operators* | |
'(+ - * / 1+ 1- exp expt log sqrt abs | |
sin cos tan cis asin acos atan | |
sinh cosh tanh asinh acosh atanh)) | |
(defmacro defwrappers (type fmt &optional (operators *standard-operators*)) | |
"Define TYPE-specific macros for OPERATORS named with PREFIX. | |
The prefixed operators coerce all values (operands and results) to TYPE." | |
(labels ((wrapper (op) | |
`(defwrapper ,(ensure-symbol (format nil fmt op)) ,op ,type))) | |
`(progn ,@(mapcar #'wrapper operators)))) | |
(defmacro defwrapper (typed-op base-op type) | |
"Expand (TYPED-OP ...) => (BASE-OP ...) with all values coerced to TYPE." | |
(with-unique-names (args) | |
`(defmacro ,typed-op (&rest ,args) | |
(wrap (cons ',base-op ,args) ',type)))) | |
(defun wrap (exp type) | |
"Rewrite EXP with result and each argument coerced to TYPE." | |
(flet ((coerced (e) | |
`(coerce ,e ',type))) | |
(coerced (cons (first exp) | |
(mapcar #'coerced (rest exp)))))) | |
;;;;; Parzen kernel density estimator | |
;;; | |
;;; Variable kernel density estimator in which each sample becomes the | |
;;; mean of a Normal kernel with standard deviation spanning to the | |
;;; farthest neighbor. | |
(deftype f64 () 'double-float) | |
(defconstructor normal (μ f64) (σ f64)) | |
(defstruct kde | |
(min ⊥ :type f64) | |
(max ⊥ :type f64) | |
(kernels ⊥ :type (vector normal))) | |
(defun kde (min max samples) | |
(let* ((prior (/ (- max min) 2)) | |
(means (cons prior samples)) | |
(n (length means))) | |
(labels ((μ (i) | |
;; Mean at index I with underflow/overflow mapped to min/max | |
(cond ((= i -1) min) | |
((= i n) max) | |
(t (elt means i)))) | |
(σ (i) | |
;; Calculate stddev as distance to furthest neighbor | |
(max double-float-epsilon | |
(- (μ i) (μ (1- i))) | |
(- (μ (1+ i)) (μ i))))) | |
(make-kde :kernels (map '(vector normal) | |
(λ (i) (normal (μ i) (σ i))) | |
(iota n)) | |
:min min :max max)))) | |
(defmethod sample ((kde kde)) | |
"Sample one random value from kernel density estimator KDE." | |
(with-slots (min max kernels) kde | |
(ematch (random-elt kernels) | |
((normal μ σ) (clamp (rnorm μ σ) min max))))) | |
(defmethod density ((kde kde) x) | |
"Return the probability density point estimate for X in KDE." | |
(with-slots (min max kernels) kde | |
(if (or (< x min) (> x max)) | |
0 | |
(loop for k across kernels | |
summing (normal-density k x) into sum | |
finally (return (/ sum (length kernels))))))) | |
;; Define math operators (+. -. *. /. sqrt. ...) with automatic coercion to double-float. | |
(defwrappers double-float "~s.") | |
(defun normal-density (normal x) | |
"Return the probability density for Normal(μ,σ) at point X." | |
;; Standard equation e.g. https://en.wikipedia.org/wiki/Normal_distribution | |
(with-slots (μ σ) normal | |
(/. (exp. (*. -1/2 (expt. (/. (-. x μ) σ) 2))) | |
(sqrt. (*. 2 pi)) | |
σ))) | |
(defun rnorm (μ σ) | |
"Sample one random number from Normal(μ,σ)." | |
(loop for u1 = (random 1.0) | |
for u2 = (random 1.0) | |
when (> u1 single-float-epsilon) | |
do (return (+. μ (*. (*. σ (sqrt. (*. -2 (log. u1)))) | |
(cos. (*. pi 2 u2))))))) | |
;;;; User interface | |
(defmacro defhyperparameters (name &body parameters) | |
(let ((docstring (if (stringp (first parameters)) (pop parameters) nil))) | |
`(defparameter ,name | |
(make-instance (defclass ,name (hyperparameters) | |
,(loop for (name type . doc) in parameters | |
collect `(,name :type ,type :accessor ,name | |
,@(when doc (cons :documentation doc)))))) | |
,@(ensure-list docstring)))) | |
(defparameter *minimum-random-samples* 20 | |
"Minimum number of random samples before optimization starts.") | |
(defparameter *gamma* 0.2 | |
"Fraction of samples for the 'good' pile.") | |
(defparameter *candidates* 5 | |
"Number of alternatives to consider for each parameter's new value.") | |
(defclass hyperparameters () | |
((%samples :initform nil :type list))) | |
(defun best! (hyperparameters) | |
"Assign the best known values to HYPERPARAMETERS. | |
This is the exact parameter set that received the highest score." | |
(set! hyperparameters | |
(second (extremum (slot-value hyperparameters '%samples) | |
#'> :key #'first)))) | |
(defun next! (hyperparameters) | |
"Assign experimental new values to HYPERPARAMETERS. | |
This proposes the next parameters to evaluate. See SCORE." | |
(if (length< (slot-value hyperparameters '%samples) *minimum-random-samples*) | |
(loop for slot in (sb-mop:class-direct-slots (class-of hyperparameters)) | |
do (setf (slot-value hyperparameters (sb-mop:slot-definition-name slot)) | |
(random-value-from (sb-mop:slot-definition-type slot)))) | |
(design-next-experiment hyperparameters))) | |
;;;; Machinery for running experiments | |
(defun set! (hyperparameters param-values) | |
"Set the slots of HYPERPARAMETERS to alist PARAM-VALUES." | |
(loop for (param . value) in param-values | |
do (setf (slot-value hyperparameters param) value))) | |
(defun design-next-experiment (hyperparameters) | |
(with-slots (%samples) hyperparameters | |
(mvlet ((superior inferior (halves (sort %samples #'> :key #'first) | |
(ceiling (* (length %samples) *gamma*))))) | |
(set! hyperparameters | |
(loop for param in (parameter-names hyperparameters) | |
for type in (parameter-types hyperparameters) | |
collect (cons param | |
(optimized type (extract param superior) (extract param inferior)))))))) | |
(defun extract (param samples) | |
"Extract all values of PARAM from SAMPLES." | |
(loop for s in samples | |
collect (assoc-value (second s) param))) | |
(defun optimized (type superior inferior) | |
"Return a value of TYPE that is more like SUPERIOR than INFERIOR samples." | |
(let ((good (make-estimator type superior)) | |
(poor (make-estimator type inferior))) | |
(flet ((likelihood-ratio (x) | |
;; https://en.wikipedia.org/wiki/Likelihood-ratio_test | |
;; ... but note that Rust tpe code takes difference of log likelihood | |
(/ (density good x) (density poor x)))) | |
(extremum (loop repeat *candidates* collect (sample good)) | |
#'> :key #'likelihood-ratio)))) | |
(defun random-value-from (type) | |
(destructuring-case type | |
((double-float min max) (+ min (random (- max min)))) | |
((member &rest objects) (random-elt objects)))) | |
(defun score! (hyperparameters score) | |
"Assign SCORE to the current values of HYPERPARAMETERS. | |
Higher scores are better and guide future experimental choices. | |
This function should be called after CHOOSE-EXPERIMENTAL." | |
(push (list score | |
(loop for slot in (sb-mop:class-direct-slots (class-of hyperparameters)) | |
for name = (sb-mop:slot-definition-name slot) | |
collect (cons name (slot-value hyperparameters name)))) | |
(slot-value hyperparameters '%samples)) | |
score) | |
(defun parameter-names (hyperparameters) | |
"Return the list of parameter names (slot names) for HYPERPARAMETERS." | |
(mapcar #'sb-mop:slot-definition-name (sb-mop:class-direct-slots (class-of hyperparameters)))) | |
(defun parameter-types (hyperparameters) | |
"Return the list of parameter names (slot names) for HYPERPARAMETERS." | |
(mapcar #'sb-mop:slot-definition-type (sb-mop:class-direct-slots (class-of hyperparameters)))) | |
;;;; Test case | |
;;; | |
;;; Select restricted operands and coefficients to estimate a target value (e.g. PI.) | |
(defhyperparameters *hyper-test* | |
"Hyperparameters for test function: A <op1> (B <op2> (<op3> C))." | |
(a (double-float 1d0 100d0)) | |
(b (double-float 0.001d0 1000d0)) | |
(c (double-float 5.0d0 5.5d0)) | |
(op1 (member + - * /)) | |
(op2 (member + - * /)) | |
(op3 (member exp log))) | |
(defun hyper-test (&key (goal pi) (n 100)) | |
(setf *hyper-test* (make-instance '*hyper-test*)) | |
(with-slots (a b c op1 op2 op3) *hyper-test* | |
(dotimes (i n) | |
(next! *hyper-test*) | |
(score! *hyper-test* (hyper-test-score goal))) | |
(best! *hyper-test*) | |
(hyper-test-score goal))) | |
(defun hyper-test-score (goal) | |
(with-slots (a b c op1 op2 op3) *hyper-test* | |
(let* ((result (funcall op1 a (funcall op2 b (funcall op3 c)))) | |
(diff (abs (- result goal)))) | |
(- diff)))) | |
(defun report (hyperparameters) | |
(let* ((samples (slot-value hyperparameters '%samples)) | |
(ranked (sort samples #'> :key #'first))) | |
(format t "~&Score: top ~,3f (mean ~,3f over ~d scores)~%" | |
(first (first ranked) ) | |
(mean (mapcar #'first ranked)) | |
(length ranked)) | |
(loop for (param . value) in (second (first ranked)) | |
do (format t "~&~a = ~a" param value)) | |
(terpri))) | |
(defun hyper-test-expression () | |
"Return the function-approximation expression chosen by hyper-optimization." | |
(with-slots (a b c op1 op2 op3) *hyper-test* | |
`(,op1 ,a (,op2 ,b (,op3 ,c))))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment