Last active
December 26, 2015 20:19
-
-
Save twolfe18/7207779 to your computer and use it in GitHub Desktop.
optimization package interfaces
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
trait Func0[V, P] { | |
def value(p: P): V | |
def dimension: Int | |
} | |
trait Func1[V, G, P] extends Func0[V, P] { | |
def grad(p: P): G = compute(p)._2 | |
def compute(p: P): (V, G) | |
} | |
trait Minimizer0[V, P] { def minimize(f: Func0[V, P]): P } | |
trait Minimizer1[V, G, P] { def minimize(f: Func1[V, G, P]): P } | |
class SGD(val learningRate: Double = 0.1d, val tol: Double = 1e-6, val doLineSearch: Boolean = false) | |
extends Minimizer1[Double, Array[Double], Array[Double]] { | |
lazy val ls = new LineSearch | |
override def minimize(f: Func1[Double, Array[Double], Array[Double]]): Array[Double] = { | |
val p = Array.ofDim[Double](f.dimension) | |
var progress = 999999d | |
var vPrev = 999999d | |
while(progress > tol) { | |
val (v, g) = f.compute(p) | |
if(doLineSearch) | |
p = ls.search(f, p, g) | |
else | |
p -= learningRate * g | |
progress = vPrev - v | |
vPrev = v | |
} | |
p | |
} | |
} | |
class LineSearch { | |
def search(f: Func0[Double, Array[Double]], starting: Array[Double], direction: Array[Double]): Array[Double] = { | |
val p = Arrays.copyOf(starting) | |
var step = 1d | |
var best = (99999d, p) | |
while(step > 1e-6) { | |
val v = f.value(p) | |
if(v < best._1) | |
best = (v, p.copy) | |
step *= 0.3d | |
p = starting + step * direction | |
} | |
best._2 | |
} | |
} | |
// this class has ancillary data that it hides | |
// this data is added with promote | |
trait HidesState[-LessInfo, +MoreInfo] { | |
def promote(x: LessInfo): MoreInfo | |
def demote(x: MoreInfo): LessInfo | |
} | |
// if you don't need conversions (e.g. no batches) | |
implicit def nothingToHide[T] = new HidesState[T, T] { | |
def promote(t: T): T = t | |
def demote(t: T): T = t | |
} | |
class Minimizer1Scala[V, G, P, R] { | |
def minimize(f: Func1[V, G, P])(implicit buildPoint: HidesState[R, P]): R | |
} | |
class SGDScala extends Minimizer1Scala[Double, Array[Double], PointWithBatch, Array[Double]] { | |
def minimize(f: Func1[Double, Array[Double], PointWithBatch]) | |
(implicit batch: HidesState[Array[Double], PointWithBatch]): Array[Double] = { | |
// in this example, batch stores the state that is in PointWithBatch | |
// that isn't just the Array[Double]. | |
// we can build the Array[Double] by just knowing the dimension of the function | |
val p: Array[Double] = Array.ofDim[Double](f.dimension) | |
// the function needs to know the batch, so we let batch annotate | |
// the Array[Double] that we made | |
val pWithBatch: PointWithBatch = batch.promote(p) | |
val (v, g) = f.compute(pWithBatch) | |
// this allows us to resolve the seemingly conflicting requirements: | |
// 1) the function needs to take a batch | |
// 2) this class needs to instantiate new points as Array[Double] | |
// when we finally find a PointWithBatch we like, we return | |
val r: Array[Double] = batch.demote(pWithBatch) | |
return r | |
// ok, i believe we have officially hit over-engineering | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// decide on one of these, but is sort of orthogonal to this code | |
type Solution = Vector | |
type Solution = (Vector, Double) // solution + f(solution) | |
trait Optimizer { | |
def optimize(problem: OptimizationProblem): Solution | |
} | |
/** | |
* convenient methods for creating OptimizationProblems | |
*/ | |
object OptimizationProblem { | |
def maximize(f: Func): OptimizationProblem[Func] | |
def minimize(f: Func): OptimizationProblem[Func] | |
} | |
trait OptimizationProblem[F <: Func] { | |
def objective: F | |
def constraints: Seq[Constraint] | |
// choose one of the following | |
// i'm biased towards the first because it's more general | |
def compare(solA: Solution, solB: Solution): Double | |
def shouldMaximize: Boolean | |
def timeLimit: Option[Time] | |
def memLimit: Option[Memory] | |
def cpuLimit: Option[Int] | |
// etc | |
} | |
/** | |
* for a new Optimizer that you implement, | |
* if you want to tie into the testing code, you | |
* need to implement one of these | |
*/ | |
trait OptimizerFactory { | |
/** | |
* can inspect the problem to set parameters | |
* e.g. maybe having short time limit means you'll set some Optimizer settings differently | |
*/ | |
def defaultOptimizer(problem: OptimizationProblem): Optimizer | |
/** | |
* return a grid of parameters that you think will work for this problem | |
*/ | |
def gridOfOptimizers(problem: OptimizatinoProblem, maxOptimizers: Int): Seq[Optimzer] | |
} | |
/** | |
* TODO: add logging to show which settings are doing well | |
* this class might dispatch optimization jobs to different grid machines! | |
* this class might also be a nice home for GP code | |
* (in which case we might change the name to something like MetaOptimzer) | |
* actually probably better to describe the GP stuff as an Optimizer itself (over hyper-params) | |
* and keep the semantics of this class purely for calling Optimzers (e.g. necessarily the "top") | |
*/ | |
trait Sweeper { | |
def sweep(problem: OptimizationProblem, optFactory: OptimizationFactory, n: Int = 10): (Solution, Optimizer) = | |
sweep(problem, optFactory.gridOfOptimizers(problem, n)) | |
def sweep(problem: OptimizationProblem, optimizers: Seq[Optimizer]): (Solution, Optimizer) = { | |
var best: (Solution, Optimizer) = null | |
for(opt <- optimizers) { | |
val sol = opt.optimize(problem) | |
if(best == null || problem.compare(sol, best) > 0d) | |
best = (sol, opt) | |
} | |
return best | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment