Skip to content

Instantly share code, notes, and snippets.

@thangarajan8
Last active January 27, 2025 11:53
Show Gist options
  • Save thangarajan8/16c6ec4bb150dbdc5346a4d88e373d92 to your computer and use it in GitHub Desktop.
Save thangarajan8/16c6ec4bb150dbdc5346a4d88e373d92 to your computer and use it in GitHub Desktop.
to be delete
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from concurrent.futures import ThreadPoolExecutor
# Sample DataFrame
data = {
'col1': range(100),
'col2': range(100, 200)
}
df = pd.DataFrame(data)
# MSSQL connection setup
connection_string = URL.create(
"mssql+pyodbc",
username="your_username",
password="your_password",
host="your_host",
port=1433,
database="your_database",
query={"driver": "ODBC Driver 17 for SQL Server"},
)
engine = create_engine(connection_string)
# Define chunk size
chunk_size = 20
# Split DataFrame into chunks
chunks = [df[i:i + chunk_size] for i in range(0, len(df), chunk_size)]
# Function to insert a chunk into the database
def insert_chunk(chunk, table_name, if_exists_option):
chunk.to_sql(
name=table_name,
con=engine,
if_exists=if_exists_option,
index=False
)
# Multithreading insertion
def insert_chunks_multithreaded(chunks, table_name):
with ThreadPoolExecutor() as executor:
futures = []
for i, chunk in enumerate(chunks):
if_exists_option = 'replace' if i == 0 else 'append'
futures.append(
executor.submit(insert_chunk, chunk, table_name, if_exists_option)
)
# Ensure all threads complete
for future in futures:
future.result()
# Call the function
insert_chunks_multithreaded(chunks, "your_table_name")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment