This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.functions._ | |
import spark.implicits._ | |
import sys.process._ | |
val paths = dbutils.fs.ls("/FileStore/shared_uploads/[email protected]/shapefiles/").toDF | |
.select("path", "name") | |
.where(col("path").endsWith(".zip")) | |
.withColumn("path", regexp_replace($"path", "dbfs:/", "/dbfs/")) | |
.withColumn("root", regexp_replace($"path", $"name", lit(""))) | |
.drop("name") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
try: | |
sys.modules.pop("tests.test_advanced") | |
from tests.test_advanced import AdvancedTestSuite | |
except KeyError as e: | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_graphviz(graph): | |
from tempfile import NamedTemporaryFile | |
from base64 import b64encode | |
with NamedTemporaryFile(suffix=".png") as fh: | |
graph.plot(to_file=fh.name) | |
img = b64encode(fh.read()).decode("UTF-8") | |
displayHTML(f"<img src='data:image/png;base64,{img}'>") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.ml.linalg.Vector | |
val toArray = udf { v: Vector => v.toArray } | |
spark.sqlContext.udf.register("toArray", toArray) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@pandas_udf("timestamp", PandasUDFType.SCALAR) | |
def from_xltime(x): | |
import pandas as pd | |
import datetime as dt | |
return (pd.TimedeltaIndex(x, unit='d') + dt.datetime(1899,12,30)).to_series() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data.iloc[:, np.r_[5:data.columns.size,1]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mlflow.pyfunc | |
import mlflow.keras | |
class KerasWrapper(mlflow.pyfunc.PythonModel): | |
def __init__(self, keras_model_name): | |
self.keras_model_name = keras_model_name | |
def load_context(self, context): | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import json | |
import numpy as np | |
import pandas as pd | |
from math import ceil | |
class KinesisWriter: | |
def __init__(self, region, stream, classes): | |
self.kinesis_client = None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from threading import Thread | |
def producer_method(): | |
dbutils.notebook.run( | |
path="./kinesis-producer", | |
timeout_seconds=600, | |
arguments={ | |
"kinesisRegion": KINESIS_REGION, | |
"inputStream": INPUT_STREAM, | |
"newsgroupDataLocation": NEWSGROUP_DATA_PATH |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
sdisplay <- function(x) { | |
x %>% sample_n(1000) %>% collect() %>% display | |
} |
NewerOlder