Created
September 8, 2022 17:48
-
-
Save Tinram/39eeec43e68f1d33c82ee4afe8e30267 to your computer and use it in GitHub Desktop.
faster MySQL LOAD DATA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Bulk Loader | |
Bulk load into a new InnoDB table via a temporary MyISAM memory table. | |
~2x faster than LOAD DATA direct into InnoDB table. | |
From https://gist.github.com/Tinram/18bcbbaeaee648ccde1b8ddfeb1d6f63 | |
Note: MEMORY table 2x faster, but requires max_heap_table_size pre-calculation; memory shunting/algos = poor performance. | |
Author: Martin Latter | |
Date: 07/09/2022 | |
Version: 0.02 | |
Py req: MySQLdb | |
pip3 install mysqlclient | |
""" | |
import contextlib | |
import sys | |
import time | |
import MySQLdb | |
import MySQLdb.cursors | |
############################################################# | |
# CONFIGURATION | |
############################################################# | |
DATAFILE = 'dump.csv' | |
TMP_TABLE_NAME = 'mem_import' | |
FINAL_TABLE_NAME = 'users' | |
CREDENTIALS_DB_LOCAL = dict( | |
user = 'root', | |
passwd = '', | |
host = 'localhost', | |
db = 'bulkload', | |
port = 3306, | |
cursorclass=MySQLdb.cursors.DictCursor | |
) | |
TEMP_TABLE = """ | |
CREATE TEMPORARY TABLE IF NOT EXISTS `%s`( | |
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, | |
`firstname` VARCHAR(20) NOT NULL, | |
`lastname` VARCHAR(20) NOT NULL, | |
`country` VARCHAR(20) NOT NULL DEFAULT '', | |
`country_code` CHAR(2) NOT NULL, | |
KEY `idx_fl` (`firstname`, `lastname`) | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; | |
""" % (TMP_TABLE_NAME) | |
""" | |
server settings to consider using: | |
log_bin = OFF | |
innodb_doublewrite = OFF | |
innodb_flush_method = O_DIRECT | |
innodb_change_buffer_max_size = 50 | |
ALTER INSTANCE disable innodb redo_log; # 8.0.21+ | |
""" | |
############################################################# | |
UK = """ SET SESSION unique_checks = OFF """ | |
FK = """ SET SESSION foreign_key_checks = OFF """ | |
FL = """ SET GLOBAL innodb_flush_log_at_trx_commit = 2 """ | |
IF = """ SET GLOBAL local_infile = ON """ | |
IFE = """ SET GLOBAL local_infile = OFF """ # security clean-up | |
IMPORT = """ | |
LOAD DATA LOCAL INFILE '%s' INTO TABLE `%s` FIELDS TERMINATED BY ',' IGNORE 1 LINES (firstname, lastname, country, country_code) | |
""" % (DATAFILE, TMP_TABLE_NAME) | |
CONV = """ | |
ALTER TABLE `%s` ENGINE=InnoDB | |
""" % (TMP_TABLE_NAME) | |
CT = """ | |
CREATE TABLE `%s` LIKE `%s` | |
""" % (FINAL_TABLE_NAME, TMP_TABLE_NAME) | |
INS = """ | |
INSERT INTO `%s` SELECT * FROM `%s`; | |
""" % (FINAL_TABLE_NAME, TMP_TABLE_NAME) | |
DEL = """ | |
DROP TABLE `%s` | |
""" % (TMP_TABLE_NAME) | |
################################################################################# | |
def main(): | |
""" Connect and run queries. """ | |
try: | |
conn = MySQLdb.connect(**CREDENTIALS_DB_LOCAL) | |
except MySQLdb.Error as err: | |
print('Cannot connect to database - %d: %s' % (err.args[0], err.args[1])) | |
sys.exit(1) | |
with contextlib.closing(conn): | |
with conn.cursor() as cursor: | |
try: | |
cursor.execute(IF) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(UK) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(FK) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(IF) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(TEMP_TABLE) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
start = time.time() | |
imp = cursor.execute(IMPORT) | |
if imp > 0: | |
print(str(imp) + ' rows loaded') | |
finish = time.time() - start | |
print('LOAD DATA: ' + str(round(finish, 3)) + 's') | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(CONV) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(CT) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(INS) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(DEL) | |
except MySQLdb.Error as err: | |
print(err) | |
try: | |
cursor.execute(IFE) | |
except MySQLdb.Error as err: | |
print(err) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment