-
-
Save grgcombs/0bf67fb68000d23f7975182c1e36ff89 to your computer and use it in GitHub Desktop.
Univariate Linear Regression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Cocoa | |
class CSVDoubleSequence: SequenceType { | |
typealias GeneratorType = IndexingGenerator<Array<Double>> | |
let path: String | |
let values: [Double] | |
init(path: String) { | |
self.path = path | |
var error: NSError? | |
let file = String.stringWithContentsOfFile(path, encoding: NSUTF8StringEncoding, error: &error) | |
if let e = error { | |
assert(false, "Error opening \(path): \(e.localizedDescription)") | |
} | |
let delimiters = NSMutableCharacterSet.whitespaceAndNewlineCharacterSet() | |
delimiters.addCharactersInString(",") | |
let doubles = file?.componentsSeparatedByCharactersInSet(delimiters) | |
.filter({ $0 != "" }) | |
.map({ x in (x as NSString).doubleValue }) | |
values = doubles ?? [Double]() | |
} | |
func generate() -> IndexingGenerator<Array<Double>> { | |
return values.generate() | |
} | |
} | |
func meanSquaredError(predictions: [Double], values: [Double]) -> Double { | |
let sum = reduce(Zip2(predictions, values), 0.0) { (acc, y) in | |
let diff = y.0 - y.1 | |
return acc + (diff * diff) | |
} | |
return sum / Double(predictions.count) | |
} | |
func learnUnivariateWeights(x: [Double], y: [Double]) -> (Double, Double) { | |
let n = Double(x.count) | |
let sumX = x.reduce(0, +) | |
let sumY = y.reduce(0, +) | |
let sumXY = reduce(Zip2(x, y), 0.0) { (acc, val) in | |
return acc + val.0 * val.1 | |
} | |
let sumXsquared = x.map({$0 * $0}).reduce(0, combine: +) | |
let w1 = ((n * sumXY) - (sumX * sumY)) / ((n * sumXsquared) - (sumX * sumX)) | |
let w0 = (sumY - (w1 * sumX)) / n | |
return (w0, w1) | |
} | |
func linearRegression(weights: [Double]) -> ([Double]) -> (Double) { | |
return { x in | |
// the x input into this should always have the first weight be 1... | |
return reduce(Zip2(weights, x), 0, { (acc, pair) in | |
return acc + pair.0 * pair.1 | |
}) | |
} | |
} | |
// load data | |
let manager = NSFileManager.defaultManager() | |
let trainingData = CSVDoubleSequence(path: "/Users/jarsen/Desktop/learning.playground/Resources/univariate_data-train.csv") | |
let testData = CSVDoubleSequence(path: "/Users/jarsen/Desktop/learning.playground/Resources/univariate_data-train.csv") | |
// process data | |
func processTwoColumns(data: CSVDoubleSequence) -> (x: [Double], y: [Double]) { | |
var x = [Double](); | |
var y = [Double](); | |
for (index, item) in enumerate(data) { | |
switch(index % 2) { | |
case 0: | |
x.append(item) | |
default: | |
y.append(item) | |
} | |
} | |
return (x, y) | |
} | |
let (trainX, trainY) = processTwoColumns(trainingData) | |
let (testX, testY) = processTwoColumns(testData) | |
// learn function | |
let weights = learnUnivariateWeights(trainX, trainY) | |
let learnedFunction = linearRegression([weights.0, weights.1]) | |
// test function on test data | |
let predictions = testX.map { x in learnedFunction([1, x]) } | |
// calculate loss function on results of test data | |
meanSquaredError(predictions, testY) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment