twolfe18 · December 26, 2015 20:19
diff --git a/gistfile1.scala b/gistfile1.scala
 trait Func0[V, P] {
 	def value(p: P): V
 	def dimension: Int
 }

 trait Func1[V, G, P] extends Func0[V, P] {
 	def grad(p: P): G = compute(p)._2
 	def compute(p: P): (V, G)
 }

 trait Minimizer0[V, P] { def minimize(f: Func0[V, P]): P }
 trait Minimizer1[V, G, P] { def minimize(f: Func1[V, G, P]): P }

 class SGD(val learningRate: Double = 0.1d, val tol: Double = 1e-6, val doLineSearch: Boolean = false)
 		extends Minimizer1[Double, Array[Double], Array[Double]] {
 	lazy val ls = new LineSearch
 	override def minimize(f: Func1[Double, Array[Double], Array[Double]]): Array[Double] = {
 		val p = Array.ofDim[Double](f.dimension)
 		var progress = 999999d
 		var vPrev = 999999d
 		while(progress > tol) {
 			val (v, g) = f.compute(p)
 			if(doLineSearch)
 				p = ls.search(f, p, g)
 			else
 				p -= learningRate * g
 			progress = vPrev - v
 			vPrev = v
 		}
 		p
 	}
 }

 class LineSearch {
 	def search(f: Func0[Double, Array[Double]], starting: Array[Double], direction: Array[Double]): Array[Double] = {
 		val p = Arrays.copyOf(starting)
 		var step = 1d
 		var best = (99999d, p)
 		while(step > 1e-6) {
 			val v = f.value(p)
 			if(v < best._1)
 				best = (v, p.copy)
 			step *= 0.3d
 			p = starting + step * direction
 		}
 		best._2
 	}
 }


 // this class has ancillary data that it hides
 // this data is added with promote
 trait HidesState[-LessInfo, +MoreInfo] {
 	def promote(x: LessInfo): MoreInfo
 	def demote(x: MoreInfo): LessInfo
 }
 // if you don't need conversions (e.g. no batches)
 implicit def nothingToHide[T] = new HidesState[T, T] {
 	def promote(t: T): T = t
 	def demote(t: T): T = t
 }
 class Minimizer1Scala[V, G, P, R] {
 	def minimize(f: Func1[V, G, P])(implicit buildPoint: HidesState[R, P]): R
 }
 class SGDScala extends Minimizer1Scala[Double, Array[Double], PointWithBatch, Array[Double]] {
 	def minimize(f: Func1[Double, Array[Double], PointWithBatch])
 			(implicit batch: HidesState[Array[Double], PointWithBatch]): Array[Double] = {

 		// in this example, batch stores the state that is in PointWithBatch
 		// that isn't just the Array[Double].
 		// we can build the Array[Double] by just knowing the dimension of the function
 		val p: Array[Double] = Array.ofDim[Double](f.dimension)

 		// the function needs to know the batch, so we let batch annotate
 		// the Array[Double] that we made
 		val pWithBatch: PointWithBatch = batch.promote(p)
 		val (v, g) = f.compute(pWithBatch)

 		// this allows us to resolve the seemingly conflicting requirements:
 		// 1) the function needs to take a batch
 		// 2) this class needs to instantiate new points as Array[Double]

 		// when we finally find a PointWithBatch we like, we return
 		val r: Array[Double] = batch.demote(pWithBatch)
 		return r

 		// ok, i believe we have officially hit over-engineering
 	}
 }




diff --git a/gistfile2.scala b/gistfile2.scala
 // decide on one of these, but is sort of orthogonal to this code
 type Solution = Vector
 type Solution = (Vector, Double)	// solution + f(solution)

 trait Optimizer {
 	def optimize(problem: OptimizationProblem): Solution
 }

 /**
 * convenient methods for creating OptimizationProblems
 */
 object OptimizationProblem {
 	def maximize(f: Func): OptimizationProblem[Func]
 	def minimize(f: Func): OptimizationProblem[Func]
 }

 trait OptimizationProblem[F <: Func] {

 	def objective: F

 	def constraints: Seq[Constraint]

 	// choose one of the following
 	// i'm biased towards the first because it's more general
 	def compare(solA: Solution, solB: Solution): Double
 	def shouldMaximize: Boolean

 	def timeLimit: Option[Time]
 	def memLimit: Option[Memory]
 	def cpuLimit: Option[Int]
 	// etc
 }

 /**
 * for a new Optimizer that you implement,
 * if you want to tie into the testing code, you
 * need to implement one of these
 */
 trait OptimizerFactory {

 	/**
 	 * can inspect the problem to set parameters
 	 * e.g. maybe having short time limit means you'll set some Optimizer settings differently
 	 */
 	def defaultOptimizer(problem: OptimizationProblem): Optimizer

 	/**
 	 * return a grid of parameters that you think will work for this problem
 	 */
 	def gridOfOptimizers(problem: OptimizatinoProblem, maxOptimizers: Int): Seq[Optimzer]
 }

 /**
 * TODO: add logging to show which settings are doing well
 * this class might dispatch optimization jobs to different grid machines!
 * this class might also be a nice home for GP code
 * (in which case we might change the name to something like MetaOptimzer)
 * actually probably better to describe the GP stuff as an Optimizer itself (over hyper-params)
 * and keep the semantics of this class purely for calling Optimzers (e.g. necessarily the "top")
 */
 trait Sweeper {
 	def sweep(problem: OptimizationProblem, optFactory: OptimizationFactory, n: Int = 10): (Solution, Optimizer) =
 		sweep(problem, optFactory.gridOfOptimizers(problem, n))
 	def sweep(problem: OptimizationProblem, optimizers: Seq[Optimizer]): (Solution, Optimizer) = {
 		var best: (Solution, Optimizer) = null
 		for(opt <- optimizers) {
 			val sol = opt.optimize(problem)
 			if(best == null || problem.compare(sol, best) > 0d)
 				best = (sol, opt)
 		}
 		return best
 	}
 }
	trait Func0[V, P] {
	def value(p: P): V
	def dimension: Int
	}

	trait Func1[V, G, P] extends Func0[V, P] {
	def grad(p: P): G = compute(p)._2
	def compute(p: P): (V, G)
	}

	trait Minimizer0[V, P] { def minimize(f: Func0[V, P]): P }
	trait Minimizer1[V, G, P] { def minimize(f: Func1[V, G, P]): P }

	class SGD(val learningRate: Double = 0.1d, val tol: Double = 1e-6, val doLineSearch: Boolean = false)
	extends Minimizer1[Double, Array[Double], Array[Double]] {
	lazy val ls = new LineSearch
	override def minimize(f: Func1[Double, Array[Double], Array[Double]]): Array[Double] = {
	val p = Array.ofDim[Double](f.dimension)
	var progress = 999999d
	var vPrev = 999999d
	while(progress > tol) {
	val (v, g) = f.compute(p)
	if(doLineSearch)
	p = ls.search(f, p, g)
	else
	p -= learningRate * g
	progress = vPrev - v
	vPrev = v
	}
	p
	}
	}

	class LineSearch {
	def search(f: Func0[Double, Array[Double]], starting: Array[Double], direction: Array[Double]): Array[Double] = {
	val p = Arrays.copyOf(starting)
	var step = 1d
	var best = (99999d, p)
	while(step > 1e-6) {
	val v = f.value(p)
	if(v < best._1)
	best = (v, p.copy)
	step *= 0.3d
	p = starting + step * direction
	}
	best._2
	}
	}


	// this class has ancillary data that it hides
	// this data is added with promote
	trait HidesState[-LessInfo, +MoreInfo] {
	def promote(x: LessInfo): MoreInfo
	def demote(x: MoreInfo): LessInfo
	}
	// if you don't need conversions (e.g. no batches)
	implicit def nothingToHide[T] = new HidesState[T, T] {
	def promote(t: T): T = t
	def demote(t: T): T = t
	}
	class Minimizer1Scala[V, G, P, R] {
	def minimize(f: Func1[V, G, P])(implicit buildPoint: HidesState[R, P]): R
	}
	class SGDScala extends Minimizer1Scala[Double, Array[Double], PointWithBatch, Array[Double]] {
	def minimize(f: Func1[Double, Array[Double], PointWithBatch])
	(implicit batch: HidesState[Array[Double], PointWithBatch]): Array[Double] = {

	// in this example, batch stores the state that is in PointWithBatch
	// that isn't just the Array[Double].
	// we can build the Array[Double] by just knowing the dimension of the function
	val p: Array[Double] = Array.ofDim[Double](f.dimension)

	// the function needs to know the batch, so we let batch annotate
	// the Array[Double] that we made
	val pWithBatch: PointWithBatch = batch.promote(p)
	val (v, g) = f.compute(pWithBatch)

	// this allows us to resolve the seemingly conflicting requirements:
	// 1) the function needs to take a batch
	// 2) this class needs to instantiate new points as Array[Double]

	// when we finally find a PointWithBatch we like, we return
	val r: Array[Double] = batch.demote(pWithBatch)
	return r

	// ok, i believe we have officially hit over-engineering
	}
	}
	// decide on one of these, but is sort of orthogonal to this code
	type Solution = Vector
	type Solution = (Vector, Double) // solution + f(solution)

	trait Optimizer {
	def optimize(problem: OptimizationProblem): Solution
	}

	/**
	* convenient methods for creating OptimizationProblems
	*/
	object OptimizationProblem {
	def maximize(f: Func): OptimizationProblem[Func]
	def minimize(f: Func): OptimizationProblem[Func]
	}

	trait OptimizationProblem[F <: Func] {

	def objective: F

	def constraints: Seq[Constraint]

	// choose one of the following
	// i'm biased towards the first because it's more general
	def compare(solA: Solution, solB: Solution): Double
	def shouldMaximize: Boolean

	def timeLimit: Option[Time]
	def memLimit: Option[Memory]
	def cpuLimit: Option[Int]
	// etc
	}

	/**
	* for a new Optimizer that you implement,
	* if you want to tie into the testing code, you
	* need to implement one of these
	*/
	trait OptimizerFactory {

	/**
	* can inspect the problem to set parameters
	* e.g. maybe having short time limit means you'll set some Optimizer settings differently
	*/
	def defaultOptimizer(problem: OptimizationProblem): Optimizer

	/**
	* return a grid of parameters that you think will work for this problem
	*/
	def gridOfOptimizers(problem: OptimizatinoProblem, maxOptimizers: Int): Seq[Optimzer]
	}

	/**
	* TODO: add logging to show which settings are doing well
	* this class might dispatch optimization jobs to different grid machines!
	* this class might also be a nice home for GP code
	* (in which case we might change the name to something like MetaOptimzer)
	* actually probably better to describe the GP stuff as an Optimizer itself (over hyper-params)
	* and keep the semantics of this class purely for calling Optimzers (e.g. necessarily the "top")
	*/
	trait Sweeper {
	def sweep(problem: OptimizationProblem, optFactory: OptimizationFactory, n: Int = 10): (Solution, Optimizer) =
	sweep(problem, optFactory.gridOfOptimizers(problem, n))
	def sweep(problem: OptimizationProblem, optimizers: Seq[Optimizer]): (Solution, Optimizer) = {
	var best: (Solution, Optimizer) = null
	for(opt <- optimizers) {
	val sol = opt.optimize(problem)
	if(best == null \|\| problem.compare(sol, best) > 0d)
	best = (sol, opt)
	}
	return best
	}
	}