Last active
October 22, 2019 15:59
-
-
Save skeller88/2a37ee0e3a6178ba429b711869b08921 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dask | |
import dask.array as da | |
from distributed import Client | |
import gcsfs | |
import imageio | |
import numpy as np | |
import time | |
def install(): | |
import os | |
os.system("pip install gcsfs imageio") | |
client = Client('35.197.27.240:8786') | |
client.run(install) | |
def read_filenames_from_gcs(filenames): | |
def read(filename): | |
fs = gcsfs.GCSFileSystem(project='big_earth') | |
r = fs.cat(filename) | |
return imageio.core.asarray(imageio.imread(r, 'TIFF')) | |
lazy_images = da.from_array([read(filename) for filename in filenames], chunks=(len(filenames), 120, 120)) | |
return lazy_images | |
fs = gcsfs.GCSFileSystem(project='big_earth') | |
filenames = fs.ls("big_earth/raw_rgb/tiff") | |
small_filenames = fs.ls("big_earth/raw_test") | |
start = time.time() | |
image_paths = [] | |
for path in filenames: | |
for band in ["B02", "B03", "B04"]: | |
image_path = f"{path}{path.split('/')[-2]}_{band}.tif" | |
image_paths.append(image_path) | |
t = time.time() | |
print('read image filenames', t - start) | |
st = time.time() | |
chunk_size = 100 | |
chunks = [] | |
start = 0 | |
end = start + chunk_size | |
while end < len(image_paths): | |
cst = time.time() | |
chunk = image_paths[start:end] | |
cst1 = time.time() | |
if start == 0: | |
print('loaded chunk in', cst1 - cst) | |
chunks.append(client.submit(read_filenames_from_gcs, chunk)) | |
if start == 0: | |
print('submitted chunk in', time.time() - cst1) | |
start = end | |
end = min(start + chunk_size, len(stack)) | |
print('completed in', time.time() - st) | |
persisted_chunks = [] | |
start = time.time() | |
for idx, chunk in enumerate(chunks): | |
if idx == 0: | |
startc = time.time() | |
persisted_chunks.append(client.persist(chunk.result())) | |
print('submitted chunk in', time.time() - startc) | |
else: | |
persisted_chunks.append(client.persist(chunk.result())) | |
print('submitted all chunks in', time.time() - start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment