gilesc · May 24, 2021 22:04
diff --git a/lfcache.py b/lfcache.py
 import atexit
 import codecs
 import functools
 import gzip
 import os.path
 import shutil
 import sys
 import tempfile
 import urllib.request

 import diskcache

 EXPIRE_TIME = 99999999999
 DISK_CACHE_PATH = os.path.join(os.path.dirname(__file__), "lfcache")
 cache = diskcache.FanoutCache(DISK_CACHE_PATH, size_limit=int(500e9), 
        cull_limit=0, timeout=EXPIRE_TIME)

 memoize = functools.partial(cache.memoize, tag="memoize", expire=EXPIRE_TIME)

 def download(URL):
    if not URL in cache:
        print(f"Cache miss: {URL}", file=sys.stderr)
        with tempfile.TemporaryDirectory() as wd:
            def cleanup():
                if os.path.exists(wd):
                    shutil.rmtree(wd)
            atexit.register(cleanup)
            target = os.path.join(wd, "download.bin")
            urllib.request.urlretrieve(URL, target)
            with open(target, "rb") as h:
                cache.set(URL, h, read=True, tag="download", expire=EXPIRE_TIME)
    o = cache.get(URL, read=True)
    if URL.endswith(".gz"):
        return gzip.GzipFile(fileobj=o, mode="rb")
    return o

 def download_text(URL, encoding="utf-8"):
    handle = download(URL)
    reader_class = codecs.getreader(encoding)
    return reader_class(handle)

 def download_table(uri, **kwargs):
    import pandas as pd
    h = download(uri)
    return pd.read_table(h, **kwargs)
	import atexit
	import codecs
	import functools
	import gzip
	import os.path
	import shutil
	import sys
	import tempfile
	import urllib.request

	import diskcache

	EXPIRE_TIME = 99999999999
	DISK_CACHE_PATH = os.path.join(os.path.dirname(__file__), "lfcache")
	cache = diskcache.FanoutCache(DISK_CACHE_PATH, size_limit=int(500e9),
	cull_limit=0, timeout=EXPIRE_TIME)

	memoize = functools.partial(cache.memoize, tag="memoize", expire=EXPIRE_TIME)

	def download(URL):
	if not URL in cache:
	print(f"Cache miss: {URL}", file=sys.stderr)
	with tempfile.TemporaryDirectory() as wd:
	def cleanup():
	if os.path.exists(wd):
	shutil.rmtree(wd)
	atexit.register(cleanup)
	target = os.path.join(wd, "download.bin")
	urllib.request.urlretrieve(URL, target)
	with open(target, "rb") as h:
	cache.set(URL, h, read=True, tag="download", expire=EXPIRE_TIME)
	o = cache.get(URL, read=True)
	if URL.endswith(".gz"):
	return gzip.GzipFile(fileobj=o, mode="rb")
	return o

	def download_text(URL, encoding="utf-8"):
	handle = download(URL)
	reader_class = codecs.getreader(encoding)
	return reader_class(handle)

	def download_table(uri, **kwargs):
	import pandas as pd
	h = download(uri)
	return pd.read_table(h, **kwargs)