-
Star
(241)
You must be signed in to star a gist -
Fork
(39)
You must be signed in to fork a gist
-
-
Save johnhw/dfc7b8b8519aac530ac97da226c17bd5 to your computer and use it in GitHub Desktop.
| ### JHW 2018 | |
| import numpy as np | |
| import umap | |
| # This code from the excellent module at: | |
| # https://stackoverflow.com/questions/4643647/fast-prime-factorization-module | |
| import random | |
| _known_factors = {} | |
| totients = {} | |
| def primesbelow(N): | |
| # http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 | |
| #""" Input N>=6, Returns a list of primes, 2 <= p < N """ | |
| correction = N % 6 > 1 | |
| N = {0:N, 1:N-1, 2:N+4, 3:N+3, 4:N+2, 5:N+1}[N%6] | |
| sieve = [True] * (N // 3) | |
| sieve[0] = False | |
| for i in range(int(N ** .5) // 3 + 1): | |
| if sieve[i]: | |
| k = (3 * i + 1) | 1 | |
| sieve[k*k // 3::2*k] = [False] * ((N//6 - (k*k)//6 - 1)//k + 1) | |
| sieve[(k*k + 4*k - 2*k*(i%2)) // 3::2*k] = [False] * ((N // 6 - (k*k + 4*k - 2*k*(i%2))//6 - 1) // k + 1) | |
| return [2, 3] + [(3 * i + 1) | 1 for i in range(1, N//3 - correction) if sieve[i]] | |
| smallprimeset = set(primesbelow(1000000)) | |
| _smallprimeset = 1000000 | |
| smallprimes = primesbelow(10000000) | |
| prime_ix = {p:i for i,p in enumerate(smallprimes)} | |
| def isprime(n, precision=7): | |
| # http://en.wikipedia.org/wiki/Miller-Rabin_primality_test#Algorithm_and_running_time | |
| if n < 1: | |
| raise ValueError("Out of bounds, first argument must be > 0") | |
| elif n <= 3: | |
| return n >= 2 | |
| elif n % 2 == 0: | |
| return False | |
| elif n < _smallprimeset: | |
| return n in smallprimeset | |
| d = n - 1 | |
| s = 0 | |
| while d % 2 == 0: | |
| d //= 2 | |
| s += 1 | |
| for repeat in range(precision): | |
| a = random.randrange(2, n - 2) | |
| x = pow(a, d, n) | |
| if x == 1 or x == n - 1: continue | |
| for r in range(s - 1): | |
| x = pow(x, 2, n) | |
| if x == 1: return False | |
| if x == n - 1: break | |
| else: return False | |
| return True | |
| # https://comeoncodeon.wordpress.com/2010/09/18/pollard-rho-brent-integer-factorization/ | |
| def pollard_brent(n): | |
| if n % 2 == 0: return 2 | |
| if n % 3 == 0: return 3 | |
| y, c, m = random.randint(1, n-1), random.randint(1, n-1), random.randint(1, n-1) | |
| g, r, q = 1, 1, 1 | |
| while g == 1: | |
| x = y | |
| for i in range(r): | |
| y = (pow(y, 2, n) + c) % n | |
| k = 0 | |
| while k < r and g==1: | |
| ys = y | |
| for i in range(min(m, r-k)): | |
| y = (pow(y, 2, n) + c) % n | |
| q = q * abs(x-y) % n | |
| g = gcd(q, n) | |
| k += m | |
| r *= 2 | |
| if g == n: | |
| while True: | |
| ys = (pow(ys, 2, n) + c) % n | |
| g = gcd(abs(x - ys), n) | |
| if g > 1: | |
| break | |
| return g | |
| def _primefactors(n, sort=False): | |
| factors = [] | |
| for checker in smallprimes: | |
| while n % checker == 0: | |
| factors.append(checker) | |
| n //= checker | |
| # early exit memoization | |
| if n in _known_factors: | |
| return factors + _known_factors[n] | |
| if checker > n: break | |
| if n < 2: return factors | |
| while n > 1: | |
| if isprime(n): | |
| factors.append(n) | |
| break | |
| factor = pollard_brent(n) # trial division did not fully factor, switch to pollard-brent | |
| factors.extend(primefactors(factor)) # recurse to factor the not necessarily prime factor returned by pollard-brent | |
| n //= factor | |
| if sort: factors.sort() | |
| return factors | |
| def primefactors(n, sort=False): | |
| if n in _known_factors: | |
| return _known_factors[n] | |
| result = _primefactors(n) | |
| _known_factors[n] = result | |
| return result | |
| from collections import defaultdict | |
| def factorization(n): | |
| factors = defaultdict(int) | |
| for p1 in primefactors(n): | |
| factors[p1] += 1 | |
| return factors | |
| def unique_factorise(n): | |
| return set(primefactors(n)) | |
| def totient(n): | |
| if n == 0: return 1 | |
| try: return totients[n] | |
| except KeyError: pass | |
| tot = 1 | |
| for p, exp in factorization(n).items(): | |
| tot *= (p - 1) * p ** (exp - 1) | |
| totients[n] = tot | |
| return tot | |
| def gcd(a, b): | |
| if a == b: return a | |
| while b > 0: a, b = b, a % b | |
| return a | |
| def lcm(a, b): | |
| return abs((a // gcd(a, b)) * b) | |
| ### end | |
| ## Create sparse binary factor vectors for any number, and assemble into a matrix | |
| ## One column for each unique prime factor | |
| ## One row for each number, 0=does not have this factor, 1=does have this factor (might be repeated) | |
| from scipy.special import expi | |
| import scipy.sparse | |
| def factor_vector_lil(n): | |
| ## approximate prime counting function (upper bound for the values we are interested in) | |
| ## gives us the number of rows (dimension of our space) | |
| d = int(np.ceil(expi(np.log(n)))) | |
| x = scipy.sparse.lil_matrix((n,d)) | |
| for i in range(2,n): | |
| for k,v in factorization(i).items(): | |
| x[i,prime_ix[k]] = 1 | |
| if i%100000==0: # just check it is still alive... | |
| print(i) | |
| return x | |
| ### Generate the matrix for 1 million integers | |
| n = 1_000 | |
| X = factor_vector_lil(n) | |
| # embed with UMAP | |
| embedding = umap.UMAP(metric='cosine', n_epochs=500).fit_transform(X) | |
| # save for later | |
| np.savez('1e6_pts.npz', embedding=embedding) | |
| # and save the image | |
| from matplotlib import pyplot as plt | |
| fig = plt.figure(figsize=(8,8)) | |
| fig.patch.set_facecolor('black') | |
| plt.scatter(embedding[:,0], embedding[:,1], marker='o', s=0.005, edgecolor='', | |
| c=np.arange(n), cmap="magma") | |
| plt.axis("off") | |
| plt.savefig("primes_umap_1e6_16k_smaller_pts.png", dpi=2000, facecolor='black') |
Hi. Been trying to reproduce this via ubuntu (in windows 10), but heck, am stuck getting past this:
my virtualenv looks like this:
blessings==1.7
bpython==0.17.1
curtsies==0.3.0
Django==1.10.5
django-appconf==1.0.2
django-compressor==2.1.1
django-leaflet-storage==0.8.2
greenlet==0.4.14
numpy==1.15.1
oauthlib==2.1.0
olefile==0.45.1
Pillow==4.0.0
pkg-resources==0.0.0
psycopg2==2.6.2
Pygments==2.2.0
PyJWT==1.6.4
python-openid==2.2.5
rcssmin==1.0.6
requests==2.13.0
requests-oauthlib==1.0.0
rjsmin==1.0.12
scipy==1.1.0
six==1.11.0
social-auth-app-django==1.1.0
social-auth-core==1.7.0
typing==3.6.4
umap-project==0.8.3
wcwidth==0.1.7
(( note: there are some bpython deps in there ))
What's the right package/provider for umap.UMAP ?
How do I resolve this?
pip install umap-learn rather than pip install umap
This is so great! I wanted to animate it through the integers so I wrote this (rewrite the f-strings if you have Python < 3.6):
step = 500
frame_num = int(n/step)
fname_prefix = 'primes_umap_2k_smaller_pts_'
print("rendering", frame_num, "frames with", step, "integers per frame")
for frame_n in range(1, frame_num+1):
_n = frame_n * step
fig = plt.figure(figsize=(8,8))
fig.patch.set_facecolor('black')
plt.scatter(embedding[0:_n,0], embedding[0:_n,1], s=0.005, c=np.arange(_n), cmap='magma', marker='o')
plt.axis("off")
plt.savefig(f"frames/{fname_prefix}{frame_n}.png", dpi=250, facecolor='black')
plt.close(fig)
print(f"rendered frame {frame_n}/{frame_num}", end='\r')
And used ffmpeg to make it into a video:
ffmpeg -ss 1 -t 200 -i frames\primes_umap_2k_smaller_pts_%d.png -c:v libx264 -vf fps=25 -pix_fmt yuv420p out.mp4


Are the blobs in the top left corner of the opengl image the prime numbers? Awesome viz, btw!