Last active
August 10, 2022 04:08
-
-
Save sqybi/c779e8ada9803321a29c3a84fd995c15 to your computer and use it in GitHub Desktop.
Calculate PageRank
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Implementation | |
import numpy as np | |
def calculate_pagerank(data, d=0.85): | |
matrix_m = np.matrix(data) | |
assert matrix_m.shape[0] == matrix_m.shape[1] | |
n = matrix_m.shape[0] | |
matrix_i = np.zeros((n, n)) | |
for pos in range(n): | |
matrix_i[pos, pos] = 1.0 | |
matrix_pr = (matrix_i - d * matrix_m).I * ((1 - d) / n * np.ones((n, 1))) | |
return matrix_pr.T.tolist()[0] | |
# Unit test | |
import random | |
def calculate_pagerank_test(): | |
# Constants | |
d = 0.85 | |
n = 10 | |
matrix_i = np.zeros((n, n)) | |
for pos in range(n): | |
matrix_i[pos, pos] = 1.0 | |
# Generate test data | |
data = [[0] * n] * n | |
sum = [0] * n | |
for i in range(n): | |
for j in range(n): | |
data[i][j] = random.random() | |
sum[j] += data[i][j] | |
for j in range(n): | |
data[n - 1][j] = 1 | |
for i in range(n - 1): | |
for j in range(n): | |
data[i][j] = data[i][j] / sum[j] | |
data[n - 1][j] -= data[i][j] | |
# Run PR algorithm | |
pr = calculate_pagerank(data, d) | |
# Verify | |
matrix_pr = np.matrix([pr]).T | |
pr_next_iter = d * np.matrix(data) * matrix_pr + (1 - d) / n * np.ones((n, 1)) | |
print(pr) | |
print(pr_next_iter.T.tolist()[0]) | |
assert np.array_equal(pr, pr_next_iter.T.tolist()[0]) | |
print('Unit test passed!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment