Created
July 9, 2020 14:48
-
-
Save sktse/f1b3bb7bceffe47637eb572a5e750f7e to your computer and use it in GitHub Desktop.
Code snippet to programmatically generate the pyspark structs from RETS table metadata
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_pyspark_structs(client): | |
property_resource = client.get_resource("Property") | |
property_class = property_resource.get_class("Property") | |
meta = property_class.table | |
fields = [] | |
for col in meta: | |
# cols.append(dict(col)) | |
name = col["SystemName"] | |
data_type = col["DataType"] | |
interpretation = col["Interpretation"] | |
spark_type = None | |
if data_type == "Character": | |
# String type. Need to check if it is an array. | |
if interpretation == "LookupMulti": | |
# This is an array of enums | |
spark_type = "ArrayType(StringType(), False)" | |
elif interpretation == "Lookup": | |
# This is a _single_ enum. | |
spark_type = "StringType()" | |
else: | |
# Free text | |
spark_type = "StringType()" | |
elif data_type == "Boolean": | |
spark_type = "BooleanType()" | |
elif data_type == "Decimal": | |
spark_type = "DecimalType()" | |
elif data_type == "Int": | |
spark_type = "IntegerType()" | |
elif data_type == "DateTime": | |
spark_type = "TimestampType()" | |
elif data_type == "Long": | |
spark_type = "LongType()" | |
elif data_type == "Date": | |
spark_type = "DateType()" | |
else: | |
raise ValueError(f"Unknown type: {data_type}") | |
struct_field_string = \ | |
f' StructField("{name}", {spark_type}, nullable=True),' | |
fields.append(struct_field_string) | |
with open(f"./tests/data/test.py", "w") as outfile: | |
outfile.write("\n".join(fields)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment