Skip to content

Instantly share code, notes, and snippets.

@chunsj
Forked from lukego/bo.lisp
Created September 28, 2021 09:48
Show Gist options
  • Save chunsj/1bfafb96292ff7903af7a7414bd462b2 to your computer and use it in GitHub Desktop.
Save chunsj/1bfafb96292ff7903af7a7414bd462b2 to your computer and use it in GitHub Desktop.
Bayesian Optimization
(defpackage bo
(:use :common-lisp :alexandria :serapeum :trivia))
(in-package :bo)
;;;; Bayesian Optimization based on Tree-structured Parzen Estimator and naive-Bayes.
;;;
;;; based on Rust code at https://docs.rs/tpe/0.1.1/tpe/
;;; which is inspired by https://optuna.org/
;;; which is inspired by http://hyperopt.github.io/hyperopt/
;;; which is described in https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf
(define-symbol-macro ⊥ (error "Illegal evauation"))
(defmacro λ (args &body body) `(lambda ,args ,@body))
(defvar *debug* nil
"Enable debug printouts.")
(defmacro dbg (fmt &rest args)
`(when *debug*
(format t ,fmt ,@args)))
;;;; Probability density estimators
(defun make-estimator (type samples)
(destructuring-case type
((double-float min max) (kde min max samples))
((member &rest domain) (histogram domain samples))))
(defgeneric sample (estimator)
(:documentation
"Sample a random value from the probability distribution of ESTIMATOR."))
(defgeneric density (estimator x)
(:documentation
"Return the probability density of the distribution ESTIMATOR at X."))
;;;;; Histogram estimator
(defstruct (histogram (:constructor make-histogram (domain weights total)))
(domain ⊥ :type sequence)
(weights ⊥ :type (vector fixnum))
(total ⊥ :type fixnum))
(defun histogram (domain samples)
;; Seed with one sample per bucket as a uniform prior.
(loop with weights = (make-array (length domain) :element-type 'fixnum :initial-element 1)
for s in samples
do (incf (aref weights (or (position s domain)
(error "Sample ~s outside domain ~s" s domain))))
finally (return (make-histogram domain weights (sum weights)))))
(defmethod density ((h histogram) category)
"Return the probability density point estimate for CATEGORY in HISTOGRAM."
(with-slots (domain weights total) h
(/ (aref weights (or (position category domain)
(error "Category ~s outside domain ~s" category domain)))
(coerce total 'f64))))
(defmethod sample ((h histogram))
(with-slots (domain weights total) h
(elt domain (count-if (fuel (random total)) weights))))
;;;; Double float math
(defparameter *standard-operators*
'(+ - * / 1+ 1- exp expt log sqrt abs
sin cos tan cis asin acos atan
sinh cosh tanh asinh acosh atanh))
(defmacro defwrappers (type fmt &optional (operators *standard-operators*))
"Define TYPE-specific macros for OPERATORS named with PREFIX.
The prefixed operators coerce all values (operands and results) to TYPE."
(labels ((wrapper (op)
`(defwrapper ,(ensure-symbol (format nil fmt op)) ,op ,type)))
`(progn ,@(mapcar #'wrapper operators))))
(defmacro defwrapper (typed-op base-op type)
"Expand (TYPED-OP ...) => (BASE-OP ...) with all values coerced to TYPE."
(with-unique-names (args)
`(defmacro ,typed-op (&rest ,args)
(wrap (cons ',base-op ,args) ',type))))
(defun wrap (exp type)
"Rewrite EXP with result and each argument coerced to TYPE."
(flet ((coerced (e)
`(coerce ,e ',type)))
(coerced (cons (first exp)
(mapcar #'coerced (rest exp))))))
;;;;; Parzen kernel density estimator
;;;
;;; Variable kernel density estimator in which each sample becomes the
;;; mean of a Normal kernel with standard deviation spanning to the
;;; farthest neighbor.
(deftype f64 () 'double-float)
(defconstructor normal (μ f64) (σ f64))
(defstruct kde
(min:type f64)
(max:type f64)
(kernels ⊥ :type (vector normal)))
(defun kde (min max samples)
(let* ((prior (/ (- max min) 2))
(means (cons prior samples))
(n (length means)))
(labels ((μ (i)
;; Mean at index I with underflow/overflow mapped to min/max
(cond ((= i -1) min)
((= i n) max)
(t (elt means i))))
(σ (i)
;; Calculate stddev as distance to furthest neighbor
(max double-float-epsilon
(- (μ i) (μ (1- i)))
(- (μ (1+ i)) (μ i)))))
(make-kde :kernels (map '(vector normal)
(λ (i) (normal (μ i) (σ i)))
(iota n))
:min min :max max))))
(defmethod sample ((kde kde))
"Sample one random value from kernel density estimator KDE."
(with-slots (min max kernels) kde
(ematch (random-elt kernels)
((normal μ σ) (clamp (rnorm μ σ) min max)))))
(defmethod density ((kde kde) x)
"Return the probability density point estimate for X in KDE."
(with-slots (min max kernels) kde
(if (or (< x min) (> x max))
0
(loop for k across kernels
summing (normal-density k x) into sum
finally (return (/ sum (length kernels)))))))
;; Define math operators (+. -. *. /. sqrt. ...) with automatic coercion to double-float.
(defwrappers double-float "~s.")
(defun normal-density (normal x)
"Return the probability density for Normal(μ,σ) at point X."
;; Standard equation e.g. https://en.wikipedia.org/wiki/Normal_distribution
(with-slots (μ σ) normal
(/. (exp. (*. -1/2 (expt. (/. (-. x μ) σ) 2)))
(sqrt. (*. 2 pi))
σ)))
(defun rnorm (μ σ)
"Sample one random number from Normal(μ,σ)."
(loop for u1 = (random 1.0)
for u2 = (random 1.0)
when (> u1 single-float-epsilon)
do (return (+. μ (*. (*. σ (sqrt. (*. -2 (log. u1))))
(cos. (*. pi 2 u2)))))))
;;;; User interface
(defmacro defhyperparameters (name &body parameters)
(let ((docstring (if (stringp (first parameters)) (pop parameters) nil)))
`(defparameter ,name
(make-instance (defclass ,name (hyperparameters)
,(loop for (name type . doc) in parameters
collect `(,name :type ,type :accessor ,name
,@(when doc (cons :documentation doc))))))
,@(ensure-list docstring))))
(defparameter *minimum-random-samples* 20
"Minimum number of random samples before optimization starts.")
(defparameter *gamma* 0.2
"Fraction of samples for the 'good' pile.")
(defparameter *candidates* 5
"Number of alternatives to consider for each parameter's new value.")
(defclass hyperparameters ()
((%samples :initform nil :type list)))
(defun best! (hyperparameters)
"Assign the best known values to HYPERPARAMETERS.
This is the exact parameter set that received the highest score."
(set! hyperparameters
(second (extremum (slot-value hyperparameters '%samples)
#'> :key #'first))))
(defun next! (hyperparameters)
"Assign experimental new values to HYPERPARAMETERS.
This proposes the next parameters to evaluate. See SCORE."
(if (length< (slot-value hyperparameters '%samples) *minimum-random-samples*)
(loop for slot in (sb-mop:class-direct-slots (class-of hyperparameters))
do (setf (slot-value hyperparameters (sb-mop:slot-definition-name slot))
(random-value-from (sb-mop:slot-definition-type slot))))
(design-next-experiment hyperparameters)))
;;;; Machinery for running experiments
(defun set! (hyperparameters param-values)
"Set the slots of HYPERPARAMETERS to alist PARAM-VALUES."
(loop for (param . value) in param-values
do (setf (slot-value hyperparameters param) value)))
(defun design-next-experiment (hyperparameters)
(with-slots (%samples) hyperparameters
(mvlet ((superior inferior (halves (sort %samples #'> :key #'first)
(ceiling (* (length %samples) *gamma*)))))
(set! hyperparameters
(loop for param in (parameter-names hyperparameters)
for type in (parameter-types hyperparameters)
collect (cons param
(optimized type (extract param superior) (extract param inferior))))))))
(defun extract (param samples)
"Extract all values of PARAM from SAMPLES."
(loop for s in samples
collect (assoc-value (second s) param)))
(defun optimized (type superior inferior)
"Return a value of TYPE that is more like SUPERIOR than INFERIOR samples."
(let ((good (make-estimator type superior))
(poor (make-estimator type inferior)))
(flet ((likelihood-ratio (x)
;; https://en.wikipedia.org/wiki/Likelihood-ratio_test
;; ... but note that Rust tpe code takes difference of log likelihood
(/ (density good x) (density poor x))))
(extremum (loop repeat *candidates* collect (sample good))
#'> :key #'likelihood-ratio))))
(defun random-value-from (type)
(destructuring-case type
((double-float min max) (+ min (random (- max min))))
((member &rest objects) (random-elt objects))))
(defun score! (hyperparameters score)
"Assign SCORE to the current values of HYPERPARAMETERS.
Higher scores are better and guide future experimental choices.
This function should be called after CHOOSE-EXPERIMENTAL."
(push (list score
(loop for slot in (sb-mop:class-direct-slots (class-of hyperparameters))
for name = (sb-mop:slot-definition-name slot)
collect (cons name (slot-value hyperparameters name))))
(slot-value hyperparameters '%samples))
score)
(defun parameter-names (hyperparameters)
"Return the list of parameter names (slot names) for HYPERPARAMETERS."
(mapcar #'sb-mop:slot-definition-name (sb-mop:class-direct-slots (class-of hyperparameters))))
(defun parameter-types (hyperparameters)
"Return the list of parameter names (slot names) for HYPERPARAMETERS."
(mapcar #'sb-mop:slot-definition-type (sb-mop:class-direct-slots (class-of hyperparameters))))
;;;; Test case
;;;
;;; Select restricted operands and coefficients to estimate a target value (e.g. PI.)
(defhyperparameters *hyper-test*
"Hyperparameters for test function: A <op1> (B <op2> (<op3> C))."
(a (double-float 1d0 100d0))
(b (double-float 0.001d0 1000d0))
(c (double-float 5.0d0 5.5d0))
(op1 (member + - * /))
(op2 (member + - * /))
(op3 (member exp log)))
(defun hyper-test (&key (goal pi) (n 100))
(setf *hyper-test* (make-instance '*hyper-test*))
(with-slots (a b c op1 op2 op3) *hyper-test*
(dotimes (i n)
(next! *hyper-test*)
(score! *hyper-test* (hyper-test-score goal)))
(best! *hyper-test*)
(hyper-test-score goal)))
(defun hyper-test-score (goal)
(with-slots (a b c op1 op2 op3) *hyper-test*
(let* ((result (funcall op1 a (funcall op2 b (funcall op3 c))))
(diff (abs (- result goal))))
(- diff))))
(defun report (hyperparameters)
(let* ((samples (slot-value hyperparameters '%samples))
(ranked (sort samples #'> :key #'first)))
(format t "~&Score: top ~,3f (mean ~,3f over ~d scores)~%"
(first (first ranked) )
(mean (mapcar #'first ranked))
(length ranked))
(loop for (param . value) in (second (first ranked))
do (format t "~&~a = ~a" param value))
(terpri)))
(defun hyper-test-expression ()
"Return the function-approximation expression chosen by hyper-optimization."
(with-slots (a b c op1 op2 op3) *hyper-test*
`(,op1 ,a (,op2 ,b (,op3 ,c)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment