Created
September 29, 2022 21:26
-
-
Save yjzhang/bc95f99c54a538955f6f6ac89f1000bb to your computer and use it in GitHub Desktop.
very basic node2vec implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
#from numba import jit | |
import numpy as np | |
from scipy import sparse | |
def random_walks(adj_list, r, l, p=1, q=1, verbose=False): | |
""" | |
Biased random walk starting from node i. | |
adj_list is an adjacency list (e.g. the data field of a lil_array) | |
r = number of walks per node | |
l = length of walk | |
p and q are probs | |
""" | |
@functools.lru_cache(10000) | |
def tr_probs(prev, node): | |
""" | |
node2vec transition probabilities | |
returns array of transition probs indexed to neighbors | |
""" | |
neighbors = adj_list[node] | |
transition_probs = np.ones(len(neighbors)) | |
if p == 1 and q == 1: | |
return transition_probs/transition_probs.sum() | |
prev_neighbors = adj_list[prev] | |
# bias | |
for i, t in enumerate(neighbors): | |
if t == prev: | |
transition_probs[i] = 1./p | |
elif t not in prev_neighbors: | |
transition_probs[i] = 1./q | |
transition_probs /= transition_probs.sum() | |
return transition_probs | |
walks = [] | |
for start in range(len(adj_list)): | |
if verbose and start % 1000 == 0: | |
print('Node: ', start) | |
for _ in range(r): | |
n_walk = 0 | |
prev_node = None | |
node = start | |
walk = [] | |
while n_walk < l: | |
n_walk += 1 | |
walk.append(node) | |
neighbors = adj_list[node] | |
if prev_node is not None: | |
tr = tr_probs(prev_node, node) | |
else: | |
tr = np.ones(len(neighbors))/len(neighbors) | |
prev_node = node | |
node = np.random.choice(neighbors, p=tr) | |
walks.append(walk) | |
return walks | |
def run_word2vec(walks, k=10, d=64): | |
""" | |
Uses gensim to run word2vec. | |
walks = list of random walks, returned by random_walks. | |
k = context size (word2vec parameter) | |
d = output dimensionality | |
""" | |
import gensim.models | |
model = gensim.models.Word2Vec(sentences=walks, vector_size=d, | |
window=k) | |
return model |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment