Last active
February 5, 2025 19:59
-
-
Save aaditkamat/8adf14a713af140e6886d809f5804daf to your computer and use it in GitHub Desktop.
Create SQL DDL and DML statements based on CSV data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def create_sql_create_statements(df: pd.DataFrame, table_name: str) -> str: | |
dtype_mapping = { | |
pd.Series({'a': '3'}).dtype: 'TEXT', | |
pd.Series({'a': 3}).dtype: 'INT', | |
pd.Series({'a': 3.0}).dtype: 'FLOAT', | |
pd.Series({'a': pd.to_datetime('2021-12-3')}).dtype: 'TIMESTAMP' | |
} | |
create_statement_string = f'CREATE TABLE {table_name} (\n' | |
for col in df.columns: | |
create_statement_string += f'\t{col} {dtype_mapping[df[col].dtype]},\n' | |
create_sql_create_statement_string = create_statement_string[: -2] # Remove ,\n from the last inserted column definition string | |
create_sql_create_statement_string += '\n);' | |
return create_sql_create_statement_string | |
def create_sql_insert_statements(df: pd.DataFrame, table_name: str) -> str: | |
psql_statements = df.apply(lambda row: f'INSERT INTO {table_name} VALUES {tuple(row.values)}', axis=1) | |
statements_string = '\n'.join(psql_statements.values) | |
return statements_string | |
if __name__ == '__main__': | |
filename = input('Enter name of CSV file: ') | |
df = pd.read_csv(f'{filename}.csv') | |
sql_insert_statements = create_sql_insert_statements(df, filename) | |
sql_create_statement = create_sql_create_statements(df, filename) | |
print(sql_create_statement) | |
print(sql_insert_statements) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment