Skip to content

Instantly share code, notes, and snippets.

@yorek
Created April 25, 2025 17:36
Show Gist options
  • Save yorek/5532c1421a69f113e0fb50e7ada2ed37 to your computer and use it in GitHub Desktop.
Save yorek/5532c1421a69f113e0fb50e7ada2ed37 to your computer and use it in GitHub Desktop.
Generate CSV file from SQL Table
# pandas==2.2.3
# SQLAlchemy==2.0.40
# pyodbc==5.2.0
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
connection_string = (
r"DRIVER={ODBC Driver 18 for SQL Server};"
r"SERVER=localhost,21433;"
r"DATABASE=WikipediaTest;"
r"TrustServerCertificate=Yes;"
r"Trusted_Connection=yes;"
)
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
#sql = f'SELECT * FROM [dbo].[wikipedia_articles_embeddings_nomic_search_queries];'
sql = f'SELECT * FROM [dbo].[wikipedia_articles_embeddings_nomic] ORDER BY id ASC;'
file_name = 'wikipedia_articles_embeddings_nomic' + '.csv'
engine = create_engine(connection_url)
with engine.connect() as conn, conn.begin():
for i, chunk in enumerate(pd.read_sql_query(sql, conn, chunksize=1000)):
is_first_chunk = i == 0
chunk.to_csv(file_name, mode="w" if is_first_chunk else "a", lineterminator="\n", header=is_first_chunk, index=False, encoding="utf-8")
print(f"Chunk {i} processed (Rows: {len(chunk)})")
print("All chunks processed and written to CSV.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment