Sample dummy models for testing. Golden rule is that python models have to always return a dataframe.
# sf_table.py
import pandas as pd
def model(dbt, session):
return pd.DataFrame({"id": [1]})Importing another Python package for use in the model:
# sf_table.py
import pandas as pd
def model(dbt, session):
dbt.config(packages=["agate"])
import agate
return pd.DataFrame({"id": [1]})Returning an empty DataFrame:
# sf_incremental.py
import pandas as pd
def model(dbt, session):
dbt.config(materialized = "incremental")
if dbt.is_incremental:
"""
If we need to return an empty dataframe on the subsequent (i.e. incremental run of the model)
then simply returning something like this will not work.
df = pd.DataFrame()
See below for an example empty dataframe that will work without Snowpark errors. Basically,
it has to be a dataframe where the columns (column names) are defined.
"""
df = pd.DataFrame({"id": [], "name": []})
else:
df = pd.DataFrame({"id": [1], "name": ["alice"]})
return df# bq_table.py
def model(dbt, session):
dbt.config(submission_method="cluster")
data = [{"id": 1}]
return session.createDataFrame(data)# bq_table.py
def model(dbt, session):
dbt.config(submission_method="serverless")
data = [{"id": 1}]
return session.createDataFrame(data)# bigframes.py
# import bigframes.pandas as bpd << already imported in parent script.
def model(dbt, session):
dbt.config(submission_method="bigframes")
return bpd.DataFrame({"id": [1]})# bigframes_ml.py
def model(dbt, session):
# Adapted from https://cloud.google.com/bigquery/docs/samples/bigquery-dataframes-clustering-model
dbt.config(submission_method="bigframes")
# Load data from BigQuery
query_or_table = "bigquery-public-data.ml_datasets.penguins"
bq_df = bpd.read_gbq(query_or_table)
# Create the KMeans model
from bigframes.ml.cluster import KMeans
cluster_model = KMeans(n_clusters=10)
cluster_model.fit(bq_df["culmen_length_mm"], bq_df["sex"])
# Predict using the model
result = cluster_model.predict(bq_df)
# Score the model
score = cluster_model.score(bq_df)
return score# db_python.py
import pandas as pd
def model(dbt, session):
dbt.config(
submission_method="all_purpose_cluster",
cluster_id="1121-175813-2agrmn6x"
)
return pd.DataFrame({"id": [1]})