Skip to content

Instantly share code, notes, and snippets.

@Moollihawkja
Last active July 13, 2018 23:30
Show Gist options
  • Save Moollihawkja/648d49e62ea72f1dfd86 to your computer and use it in GitHub Desktop.
Save Moollihawkja/648d49e62ea72f1dfd86 to your computer and use it in GitHub Desktop.
Creating egg files for spark dependencies
import os
import zipfile
EGG_FILE="~/dependencies.egg"
reference_path = "~/crawler_site_packages"
# Template to bootstrap so files to a py file
so_to_py_template = """
def __bootstrap__():
global __bootstrap__, __loader__, __file__
import sys, pkg_resources, imp
__file__ = pkg_resources.resource_filename(__name__,'PATH_TO_SO_FILE')
__loader__ = None; del __bootstrap__, __loader__
imp.load_dynamic(__name__,__file__)
__bootstrap__()
"""
# Deleting the file if it exists
if os.path.exists(EGG_FILE):
os.remove(EGG_FILE)
zipf = zipfile.ZipFile(EGG_FILE, 'w')
for root, dirs, files in os.walk(reference_path):
for f in files:
abspath = os.path.join(root, f)
arcpath = abspath.replace(reference_path, "").lstrip("/")
ext = f.split(".")[-1].lower()
if ext == "so":
so_py_file_contents = so_to_py_template.replace("PATH_TO_SO_FILE", f)
so_py_file_name = os.path.join(root, f.replace("so", "py"))
with open(so_py_file_name, "w") as fp:
fp.write(so_py_file_contents)
zipf.write(so_py_file_name, arcpath)
zipf.write(os.path.join(root, f), arcpath)
zipf.close()
sc.addPyFile(EGG_FILE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment