Created
June 1, 2023 13:40
-
-
Save vibhatha/e8408546dc3464023dd352bdf904769a to your computer and use it in GitHub Desktop.
Sample substrait plan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension_uris { | |
extension_uri_anchor: 1 | |
uri: "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml" | |
} | |
extension_uris { | |
extension_uri_anchor: 2 | |
uri: "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml" | |
} | |
extensions { | |
extension_function { | |
extension_uri_reference: 1 | |
function_anchor: 1 | |
name: "and" | |
} | |
} | |
extensions { | |
extension_function { | |
extension_uri_reference: 2 | |
function_anchor: 2 | |
name: "equal" | |
} | |
} | |
extensions { | |
extension_function { | |
extension_uri_reference: 2 | |
function_anchor: 3 | |
name: "gt" | |
} | |
} | |
relations { | |
root { | |
input { | |
sort { | |
input { | |
filter { | |
input { | |
project { | |
common { | |
emit { | |
output_mapping: 12 | |
output_mapping: 13 | |
output_mapping: 14 | |
output_mapping: 15 | |
output_mapping: 16 | |
} | |
} | |
input { | |
join { | |
left { | |
project { | |
common { | |
emit { | |
output_mapping: 3 | |
output_mapping: 4 | |
output_mapping: 5 | |
} | |
} | |
input { | |
read { | |
common { | |
direct { | |
} | |
} | |
base_schema { | |
names: "tconst" | |
names: "averageRating" | |
names: "numVotes" | |
struct { | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
nullability: NULLABILITY_REQUIRED | |
} | |
} | |
named_table { | |
names: "ratings" | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
cast { | |
type { | |
fp64 { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
input { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 1 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
failure_behavior: FAILURE_BEHAVIOR_THROW_EXCEPTION | |
} | |
} | |
expressions { | |
cast { | |
type { | |
i64 { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
input { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 2 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
failure_behavior: FAILURE_BEHAVIOR_THROW_EXCEPTION | |
} | |
} | |
} | |
} | |
right { | |
project { | |
common { | |
emit { | |
output_mapping: 9 | |
output_mapping: 10 | |
output_mapping: 11 | |
} | |
} | |
input { | |
filter { | |
input { | |
read { | |
common { | |
direct { | |
} | |
} | |
base_schema { | |
names: "tconst" | |
names: "titleType" | |
names: "primaryTitle" | |
names: "originalTitle" | |
names: "isAdult" | |
names: "startYear" | |
names: "endYear" | |
names: "runtimeMinutes" | |
names: "genres" | |
struct { | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
types { | |
string { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
nullability: NULLABILITY_REQUIRED | |
} | |
} | |
named_table { | |
names: "basics" | |
} | |
} | |
} | |
condition { | |
scalar_function { | |
function_reference: 1 | |
output_type { | |
bool { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
arguments { | |
value { | |
scalar_function { | |
function_reference: 2 | |
output_type { | |
bool { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
arguments { | |
value { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 1 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
arguments { | |
value { | |
literal { | |
string: "movie" | |
} | |
} | |
} | |
} | |
} | |
} | |
arguments { | |
value { | |
scalar_function { | |
function_reference: 2 | |
output_type { | |
bool { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
arguments { | |
value { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 4 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
arguments { | |
value { | |
literal { | |
string: "0" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 2 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 5 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
} | |
expression { | |
scalar_function { | |
function_reference: 2 | |
output_type { | |
bool { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
arguments { | |
value { | |
selection { | |
direct_reference { | |
struct_field { | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
arguments { | |
value { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 3 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
} | |
} | |
type: JOIN_TYPE_INNER | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 1 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 2 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 4 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
expressions { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 5 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
} | |
condition { | |
scalar_function { | |
function_reference: 3 | |
output_type { | |
bool { | |
nullability: NULLABILITY_NULLABLE | |
} | |
} | |
arguments { | |
value { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 2 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
} | |
arguments { | |
value { | |
literal { | |
i32: 200000 | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
sorts { | |
expr { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 1 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
direction: SORT_DIRECTION_DESC_NULLS_FIRST | |
} | |
sorts { | |
expr { | |
selection { | |
direct_reference { | |
struct_field { | |
field: 2 | |
} | |
} | |
root_reference { | |
} | |
} | |
} | |
direction: SORT_DIRECTION_DESC_NULLS_FIRST | |
} | |
} | |
} | |
names: "tconst" | |
names: "avg_rating" | |
names: "num_votes" | |
names: "primaryTitle" | |
names: "startYear" | |
} | |
} | |
version { | |
minor_number: 24 | |
producer: "ibis-substrait" | |
} |
I get a bit different plan, using the following code (tables downloaded from https://datasets.imdbws.com/)
import duckdb
from duckdb import DuckDBPyConnection
import pandas as pd
from converter import convert_tsv_to_csv
from google.protobuf import json_format
import ibis
# Setup
def convert_tsv_to_csv(src: str, dst: str) -> None:
"""
A simple python method to convert a tsc file into a csv file.
Args:
src (str): source path for the .tsv file
dst (str): desired destination path
"""
try:
table = pd.read_table(filepath_or_buffer=src, sep="\t")
table.to_csv(dst, index=False)
except Exception as e:
raise Exception(f"Failed to convert TSV to CSV: {str(e)}")
def prepare_tables():
convert_tsv_to_csv(src="../datasets/title.ratings.tsv", dst="../datasets/ratings.csv")
convert_tsv_to_csv(src="../datasets/title.basics.tsv", dst="../datasets/basics.csv")
def load_tables(connection: DuckDBPyConnection):
ratings_df = pd.read_csv("../datasets/ratings.csv")
connection.execute(
"CREATE TABLE ratings AS SELECT * FROM ratings_df"
)
basics_df = pd.read_csv(filepath_or_buffer="../datasets/basics.csv")
connection.execute(
"CREATE TABLE basics AS SELECT * FROM basics_df"
)
duckdb_conn = duckdb.connect(database="imdb.db",read_only=False)
load_tables(duckdb_conn)
import ibis
from ibis import _
from ibis_substrait.compiler.core import SubstraitCompiler
ratings = ibis.table(
[
("tconst", "str"),
("averageRating", "str"),
("numVotes", "str"),
],
name="ratings",
)
ratings = ratings.select(
tconst=ratings.tconst,
avg_rating = ratings.averageRating.cast('float'),
num_votes=ratings.numVotes.cast("int")
)
basics = ibis.table(
[
("tconst", "str"),
("titleType", "str"),
("primaryTitle", "str"),
("originalTitle", "str"),
("isAdult", "str"),
("startYear", "str"),
("endYear", "str"),
("runtimeMinutes", "str"),
("genres", "str"),
],
name="basics",
)
basics = basics.filter([basics.titleType == "movie", basics.isAdult == "0"]).select(
"tconst",
"primaryTitle",
"startYear",
)
topfilms = (
ratings.join(basics, "tconst")
.order_by([_.avg_rating.desc(), _.num_votes.desc()])
.filter(_.num_votes > 200_000)
)
compiler = SubstraitCompiler()
plan = compiler.compile(topfilms)
with open("topfilms.proto", "wb") as f:
f.write(plan.SerializeToString())
duckdb_conn.install_extension("substrait")
duckdb_conn.load_extension("substrait")
with open("topfilms.proto", "rb") as f:
plan_blob = f.read()
plan_result = duckdb_conn.from_substrait(plan_blob)
print(plan_result)
# Good results
The plan I get is a bit different (used some text comparison tool):
extension_uris {
extension_uri_anchor: 1
}
extensions {
extension_function {
extension_uri_reference: 1
function_anchor: 1
name: "and"
}
}
extensions {
extension_function {
extension_uri_reference: 1
function_anchor: 2
name: "equal"
}
}
extensions {
extension_function {
extension_uri_reference: 1
function_anchor: 3
name: "gt"
}
}
relations {
root {
input {
sort {
input {
filter {
input {
project {
common {
emit {
output_mapping: 12
output_mapping: 13
output_mapping: 14
output_mapping: 15
output_mapping: 16
}
}
input {
join {
left {
project {
common {
emit {
output_mapping: 3
output_mapping: 4
output_mapping: 5
}
}
input {
read {
common {
direct {
}
}
base_schema {
names: "tconst"
names: "averageRating"
names: "numVotes"
struct {
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
nullability: NULLABILITY_REQUIRED
}
}
named_table {
names: "ratings"
}
}
}
expressions {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
expressions {
cast {
type {
fp64 {
nullability: NULLABILITY_NULLABLE
}
}
input {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
}
}
expressions {
cast {
type {
i64 {
nullability: NULLABILITY_NULLABLE
}
}
input {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
}
}
}
}
right {
project {
common {
emit {
output_mapping: 9
output_mapping: 10
output_mapping: 11
}
}
input {
filter {
input {
read {
common {
direct {
}
}
base_schema {
names: "tconst"
names: "titleType"
names: "primaryTitle"
names: "originalTitle"
names: "isAdult"
names: "startYear"
names: "endYear"
names: "runtimeMinutes"
names: "genres"
struct {
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
nullability: NULLABILITY_REQUIRED
}
}
named_table {
names: "basics"
}
}
}
condition {
scalar_function {
function_reference: 1
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
scalar_function {
function_reference: 2
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
}
arguments {
value {
literal {
string: "movie"
}
}
}
}
}
}
arguments {
value {
scalar_function {
function_reference: 2
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 4
}
}
root_reference {
}
}
}
}
arguments {
value {
literal {
string: "0"
}
}
}
}
}
}
}
}
}
}
expressions {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 5
}
}
root_reference {
}
}
}
}
}
expression {
scalar_function {
function_reference: 2
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 3
}
}
root_reference {
}
}
}
}
}
}
type: JOIN_TYPE_INNER
}
}
expressions {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 4
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 5
}
}
root_reference {
}
}
}
}
}
condition {
scalar_function {
function_reference: 3
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
}
arguments {
value {
literal {
i32: 200000
}
}
}
}
}
}
}
sorts {
expr {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
direction: SORT_DIRECTION_DESC_NULLS_FIRST
}
sorts {
expr {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
direction: SORT_DIRECTION_DESC_NULLS_FIRST
}
}
}
names: "tconst"
names: "avg_rating"
names: "num_votes"
names: "primaryTitle"
names: "startYear"
}
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generated using the following code