Last active
January 27, 2025 11:53
-
-
Save thangarajan8/16c6ec4bb150dbdc5346a4d88e373d92 to your computer and use it in GitHub Desktop.
to be delete
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sqlalchemy import create_engine | |
from sqlalchemy.engine import URL | |
from concurrent.futures import ThreadPoolExecutor | |
# Sample DataFrame | |
data = { | |
'col1': range(100), | |
'col2': range(100, 200) | |
} | |
df = pd.DataFrame(data) | |
# MSSQL connection setup | |
connection_string = URL.create( | |
"mssql+pyodbc", | |
username="your_username", | |
password="your_password", | |
host="your_host", | |
port=1433, | |
database="your_database", | |
query={"driver": "ODBC Driver 17 for SQL Server"}, | |
) | |
engine = create_engine(connection_string) | |
# Define chunk size | |
chunk_size = 20 | |
# Split DataFrame into chunks | |
chunks = [df[i:i + chunk_size] for i in range(0, len(df), chunk_size)] | |
# Function to insert a chunk into the database | |
def insert_chunk(chunk, table_name, if_exists_option): | |
chunk.to_sql( | |
name=table_name, | |
con=engine, | |
if_exists=if_exists_option, | |
index=False | |
) | |
# Multithreading insertion | |
def insert_chunks_multithreaded(chunks, table_name): | |
with ThreadPoolExecutor() as executor: | |
futures = [] | |
for i, chunk in enumerate(chunks): | |
if_exists_option = 'replace' if i == 0 else 'append' | |
futures.append( | |
executor.submit(insert_chunk, chunk, table_name, if_exists_option) | |
) | |
# Ensure all threads complete | |
for future in futures: | |
future.result() | |
# Call the function | |
insert_chunks_multithreaded(chunks, "your_table_name") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment