Last active
September 6, 2022 12:50
-
-
Save Tinram/18bcbbaeaee648ccde1b8ddfeb1d6f63 to your computer and use it in GitHub Desktop.
Seeking a faster MySQL LOAD DATA INFILE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MySQL 5.7.33 | |
# 1 million rows, 66MB CSV file | |
# 2022-09-06 | |
## standard way into an InnoDB table; slow | |
# LOAD DATA LOCAL INFILE 'products.csv' INTO TABLE import FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY '\n' IGNORE 1 LINES (col1, col2, col3, col4) | |
-- Query OK, 1000000 rows affected (1 min 10.47 sec) | |
## mysqld parameters to adjust | |
# innodb_buffer_pool_size=2G | |
# innodb_log_file_size=512M | |
# innodb_flush_log_at_trx_commit=2 | |
# innodb_flush_method=O_DIRECT | |
# innodb_change_buffer_max_size=50 | |
## disable key checks | |
SET unique_checks=0; | |
SET foreign_key_checks=0; | |
## create temporary memory table | |
## Loading data into a memory table is orders of magnitude faster than a standard table. | |
## Using a temporary table rather than a pure memory table avoids the need to adjust the max_heap_table_size parameter for large datasets. | |
CREATE TEMPORARY TABLE t_import ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 SELECT * FROM t_product LIMIT 0; | |
-- Query OK, 0 rows affected (0.02 sec) | |
## establish primary key, which is not copied from base table | |
ALTER TABLE t_import MODIFY COLUMN id INT UNSIGNED PRIMARY KEY AUTO_INCREMENT; | |
-- Query OK, 0 rows affected (0.00 sec) | |
## load CSV data file, ignoring first header line | |
LOAD DATA LOCAL INFILE 'products.csv' INTO TABLE t_import FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES (col1, col2, col3, col4); | |
-- Query OK, 1000000 rows affected (3.32 sec) | |
## convert MyISAM table to InnoDB | |
ALTER TABLE t_import ENGINE=InnoDB; | |
-- Query OK, 1000000 rows affected (4.97 sec) | |
## add indexes to columns as required | |
# ALTER TABLE t_import ADD INDEX ... | |
## create holding table | |
CREATE table t_product_new LIKE t_import; | |
-- Query OK, 0 rows affected (0.56 sec) | |
## transfer memory table data to holding table | |
INSERT INTO t_product_new SELECT * FROM t_import; | |
-- Query OK, 1000000 rows affected (10.71 sec) | |
## rename holding table | |
RENAME TABLE t_product TO t_product_old; | |
# Query OK, 0 rows affected (0.06 sec) | |
RENAME TABLE t_product_new TO t_product; | |
-- Query OK, 0 rows affected (0.06 sec) | |
## verify table population | |
SELECT COUNT(*) FROM t_product; | |
SELECT * FROM t_product LIMIT 20; | |
## free | |
DROP TABLE t_import; | |
DROP TABLE t_product_old; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment