Created
January 20, 2019 20:13
-
-
Save justheuristic/41febdb1ad36b55db4de6ae50a2f9f2e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"step 100; loss=2.077; delta=4.287\n", | |
"step 200; loss=0.826; delta=1.251\n", | |
"step 300; loss=0.500; delta=0.326\n", | |
"step 400; loss=0.429; delta=0.071\n", | |
"step 500; loss=0.441; delta=0.012\n", | |
"step 600; loss=0.476; delta=0.035\n", | |
"step 700; loss=0.512; delta=0.036\n", | |
"step 800; loss=0.544; delta=0.032\n", | |
"step 900; loss=0.571; delta=0.027\n", | |
"step 1000; loss=0.593; delta=0.022\n", | |
"step 1100; loss=0.611; delta=0.018\n", | |
"step 1200; loss=0.627; delta=0.016\n", | |
"step 1300; loss=0.641; delta=0.014\n", | |
"step 1400; loss=0.654; delta=0.013\n", | |
"step 1500; loss=0.666; delta=0.012\n", | |
"step 1600; loss=0.677; delta=0.011\n", | |
"step 1700; loss=0.687; delta=0.011\n", | |
"step 1800; loss=0.698; delta=0.011\n", | |
"step 1900; loss=0.709; delta=0.011\n", | |
"step 2000; loss=0.722; delta=0.012\n", | |
"step 2100; loss=0.735; delta=0.013\n", | |
"step 2200; loss=0.749; delta=0.014\n", | |
"step 2300; loss=0.763; delta=0.014\n", | |
"step 2400; loss=0.776; delta=0.013\n", | |
"step 2500; loss=0.788; delta=0.012\n", | |
"step 2600; loss=0.800; delta=0.011\n", | |
"step 2700; loss=0.810; delta=0.010\n", | |
"step 2800; loss=0.820; delta=0.010\n", | |
"step 2900; loss=0.829; delta=0.009\n", | |
"step 3000; loss=0.838; delta=0.009\n", | |
"step 3100; loss=0.846; delta=0.009\n", | |
"step 3200; loss=0.855; delta=0.008\n", | |
"step 3300; loss=0.863; delta=0.008\n", | |
"step 3400; loss=0.871; delta=0.008\n", | |
"step 3500; loss=0.878; delta=0.008\n", | |
"step 3600; loss=0.886; delta=0.007\n", | |
"step 3700; loss=0.893; delta=0.007\n", | |
"step 3800; loss=0.900; delta=0.007\n", | |
"step 3900; loss=0.906; delta=0.007\n", | |
"step 4000; loss=0.913; delta=0.006\n", | |
"step 4100; loss=0.919; delta=0.006\n", | |
"step 4200; loss=0.925; delta=0.006\n", | |
"step 4300; loss=0.931; delta=0.006\n", | |
"step 4400; loss=0.937; delta=0.006\n", | |
"step 4500; loss=0.942; delta=0.006\n", | |
"step 4600; loss=0.948; delta=0.005\n", | |
"step 4700; loss=0.953; delta=0.005\n", | |
"step 4800; loss=0.958; delta=0.005\n", | |
"step 4900; loss=0.963; delta=0.005\n", | |
"step 5000; loss=0.967; delta=0.004\n", | |
"step 5100; loss=0.971; delta=0.004\n", | |
"step 5200; loss=0.974; delta=0.004\n", | |
"step 5300; loss=0.978; delta=0.003\n", | |
"step 5400; loss=0.981; delta=0.003\n", | |
"step 5500; loss=0.983; delta=0.003\n", | |
"step 5600; loss=0.986; delta=0.002\n", | |
"step 5700; loss=0.988; delta=0.002\n", | |
"step 5800; loss=0.989; delta=0.002\n", | |
"step 5900; loss=0.991; delta=0.002\n", | |
"step 6000; loss=0.992; delta=0.001\n", | |
"step 6100; loss=0.994; delta=0.001\n", | |
"step 6200; loss=0.995; delta=0.001\n", | |
"step 6300; loss=0.996; delta=0.001\n", | |
"step 6400; loss=0.996; delta=0.001\n", | |
"step 6500; loss=0.997; delta=0.001\n", | |
"step 6600; loss=0.998; delta=0.001\n", | |
"step 6700; loss=0.998; delta=0.000\n", | |
"step 6800; loss=0.998; delta=0.000\n", | |
"step 6900; loss=0.999; delta=0.000\n", | |
"step 7000; loss=0.999; delta=0.000\n", | |
"step 7100; loss=0.999; delta=0.000\n", | |
"step 7200; loss=0.999; delta=0.000\n", | |
"step 7300; loss=1.000; delta=0.000\n", | |
"step 7400; loss=1.000; delta=0.000\n", | |
"step 7500; loss=1.000; delta=0.000\n", | |
"Done: reached target tolerance\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<minifox.MinFoxSolver at 0x7f7a780a5860>" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from minifox import MinFoxSolver\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"import numpy as np\n", | |
"import tensorflow as tf\n", | |
"L = tf.keras.layers\n", | |
"\n", | |
"# toy task: random projections\n", | |
"# first, we sample p-dimensional matrix M\n", | |
"# A is constructed as a random projection of M\n", | |
"# B is a random projection of first 4 components of M\n", | |
"# hence the \"right answer\" is to extract 5-th, 6th and 7th components of M from A\n", | |
"\n", | |
"n, p = 1000, 7\n", | |
"\n", | |
"M = np.random.randn(n, p)\n", | |
"A = M.dot(np.random.randn(p, p))\n", | |
"B = M[:, :4].dot(np.random.randn(4, p))\n", | |
"\n", | |
"fox = MinFoxSolver(p=7, max_iters=10 ** 4, tolerance=1e-4,\n", | |
" gen_steps=1, pred_steps=5,\n", | |
" make_generator=lambda: L.Dense(3),\n", | |
" make_predictor=lambda: L.Dense(3),\n", | |
" verbose=True)\n", | |
"\n", | |
"fox.fit(A, B)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 1.00000000e+00, -3.81469703e-09, -9.53674258e-09],\n", | |
" [-3.81469703e-09, 1.00000000e+00, -5.72204590e-09],\n", | |
" [-9.53674258e-09, -5.72204590e-09, 1.00000000e+00]])" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vectors = fox.predict(A=A)\n", | |
"\n", | |
"cosine = lambda a, b: (a * b).sum() / (a * a).sum() ** 0.5 / (b * b).sum() ** 0.5\n", | |
"# orthogonality check\n", | |
"np.array([\n", | |
" [cosine(vectors[:, i], vectors[:, j]) for i in range(3)]\n", | |
" for j in range(3)\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"coefficient of determination from 4 first cols: 0.0002727001693523196\n", | |
"coefficient of determination from 3 last cols: 0.9978127415666408\n" | |
] | |
} | |
], | |
"source": [ | |
"from sklearn.linear_model import LinearRegression\n", | |
"regression = LinearRegression().fit(M[:, :4], vectors)\n", | |
"print(\"coefficient of determination from 4 first cols:\", regression.score(M[:, :4], vectors))\n", | |
"regression = LinearRegression().fit(M[:, 4:], vectors)\n", | |
"print(\"coefficient of determination from 3 last cols:\", regression.score(M[:, 4:], vectors))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
L = tf.keras.layers | |
class MinFoxSolver: | |
def __init__(self, p, p_b=None, pred_steps=5, gen_steps=1, max_iters=10 ** 5, tolerance=1e-3, | |
optimizer=tf.train.AdamOptimizer(5e-4), | |
make_generator=lambda: L.Dense(1, name='he_who_generates_unpredictable'), | |
make_predictor=lambda: L.Dense(1, name='he_who_predicts_generated_variable'), | |
sess=None, verbose=False, reset_predictor=False, eps=1e-9 | |
): | |
""" | |
Given two matrices A and B, predict a variable f(A) that is impossible to predict from matrix B | |
:param p: last dimension of A | |
:param p_b: dimension of B, default p_b = p | |
:param pred_steps: predictor g(B) training iterations per one training step | |
:param gen_steps: generator f(A) training iterations per one training step | |
:param max_iters: maximum number of optimization steps till termination | |
:param tolerance: terminates if loss difference between 10-iteration cycles reaches this value | |
set to 0 to iterate for max_steps | |
:param optimizer: tf optimizer to be used on both generator and discriminator | |
:param make_generator: callback to create a keras model for target variable generator given A | |
:param make_predictor: callback to create a keras model for target variable predictor given B | |
:param reset_predictor: if True, resets predictor network after every step | |
/* Маааленькая лисёнка */ | |
""" | |
self.session = sess = sess or tf.get_default_session() \ | |
or tf.Session(config=tf.ConfigProto(device_count={'GPU': 0})) | |
self.pred_steps, self.gen_steps = pred_steps, gen_steps | |
self.reset_predictor = reset_predictor | |
self.max_iters, self.tolerance = max_iters, tolerance | |
self.verbose = verbose | |
with sess.as_default(), sess.graph.as_default(): | |
A = self.A = tf.placeholder(tf.float32, [None, p]) | |
B = self.B = tf.placeholder(tf.float32, [None, p_b or p]) | |
self.generator = make_generator() | |
self.predictor = make_predictor() | |
prediction = self.predictor(B) | |
target_raw = self.generator(A) | |
# orthogonalize target and scale to unit norm | |
target = orthogonalize_columns(target_raw) | |
target *= tf.sqrt(tf.to_float(tf.shape(target)[0])) | |
self.loss = self.compute_loss(target, prediction) | |
self.reg = tf.reduce_mean(tf.squared_difference(target, target_raw)) | |
self.update_pred = optimizer.minimize(self.loss, var_list=self.predictor.trainable_variables) | |
self.reset_pred = tf.variables_initializer(self.predictor.trainable_variables) | |
self.update_gen = optimizer.minimize(-self.loss + self.reg, var_list=self.generator.trainable_variables) | |
self.prediction, self.target = prediction, target | |
def compute_loss(self, target, prediction): | |
return tf.reduce_mean(tf.squared_difference(target, prediction)) | |
def fit(self, A, B): | |
sess = self.session | |
with sess.as_default(), sess.graph.as_default(): | |
sess.run(tf.global_variables_initializer()) | |
feed = {self.A: A, self.B: B} | |
prev_loss = sess.run(self.loss, feed) | |
for i in range(1, self.max_iters + 1): | |
for j in range(self.pred_steps): | |
sess.run(self.update_pred, feed) | |
for j in range(self.gen_steps): | |
sess.run(self.update_gen, feed) | |
if i % 100 == 0: | |
loss_i = sess.run(self.loss, feed) | |
if self.verbose: | |
print("step %i; loss=%.3f; delta=%.3f" % (i, loss_i, abs(prev_loss - loss_i))) | |
if abs(prev_loss - loss_i) < self.tolerance: | |
if self.verbose: print("Done: reached target tolerance") | |
break | |
prev_loss = loss_i | |
if self.reset_predictor: | |
sess.run(self.reset_pred) | |
else: | |
if self.verbose: | |
print("Done: reached max steps") | |
return self | |
def predict(self, A=None, B=None): | |
assert (A is None) != (B is None), "Please use either predict(A=...) or predict(B=...)" | |
sess = self.session | |
with sess.as_default(), sess.graph.as_default(): | |
if A is not None: | |
return sess.run(self.target, {self.A: A}) | |
else: | |
return sess.run(self.prediction, {self.B: B}) | |
def get_weights(self): | |
return self.session.run({'generator': self.generator.trainable_variables, | |
'predictor': self.predictor.trainable_variables}) | |
def orthogonalize_rows(matrix): | |
""" Gram-shmidt orthogonalizer; source: https://bit.ly/2FMOp40 """ | |
# add batch dimension for matmul | |
basis = tf.expand_dims(matrix[0, :] / tf.norm(matrix[0, :]), 0) | |
for i in range(1, matrix.shape[0]): | |
v = tf.expand_dims(matrix[i, :], 0) # add batch dimension for matmul | |
w = v - tf.matmul(tf.matmul(v, basis, transpose_b=True), basis) | |
basis = tf.concat([basis, w / tf.norm(w)],axis=0) | |
return basis | |
def orthogonalize_columns(matrix): | |
return tf.transpose(orthogonalize_rows(tf.transpose(matrix))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment