Skip to content

Instantly share code, notes, and snippets.

@mzhang77
Last active January 6, 2025 19:43
Show Gist options
  • Save mzhang77/1301ebd5a24364f99daeb55d6cbf3d59 to your computer and use it in GitHub Desktop.
Save mzhang77/1301ebd5a24364f99daeb55d6cbf3d59 to your computer and use it in GitHub Desktop.
import mysql.connector
import re
slow_log_files = ['file1','file2']
table = "slow_log"
connection = mysql.connector.connect(
host="127.0.0.1",
port="4000",
user="root",
password="",
database="test"
)
if connection.is_connected():
connection.autocommit = True
print("Connected to MySQL")
cursor = connection.cursor()
cursor.execute(f'drop table if exists {table}')
cursor.execute(f'''
CREATE TABLE {table} (
INSTANCE varchar(64) DEFAULT NULL,
Time timestamp(6) NOT NULL,
Txn_start_ts bigint(20) unsigned DEFAULT NULL,
User varchar(100) DEFAULT NULL,
Host varchar(64) DEFAULT NULL,
Conn_ID bigint(20) unsigned DEFAULT NULL,
Session_alias varchar(64) DEFAULT NULL,
Exec_retry_count bigint(20) unsigned DEFAULT NULL,
Exec_retry_time double DEFAULT NULL,
Query_time double DEFAULT NULL,
Parse_time double DEFAULT NULL,
Compile_time double DEFAULT NULL,
Rewrite_time double DEFAULT NULL,
Preproc_subqueries bigint(20) unsigned DEFAULT NULL,
Preproc_subqueries_time double DEFAULT NULL,
Optimize_time double DEFAULT NULL,
Wait_TS double DEFAULT NULL,
Prewrite_time double DEFAULT NULL,
Get_latest_ts_time double DEFAULT NULL,
Wait_prewrite_binlog_time double DEFAULT NULL,
Commit_time double DEFAULT NULL,
Get_commit_ts_time double DEFAULT NULL,
Commit_backoff_time double DEFAULT NULL,
Backoff_types varchar(256) DEFAULT NULL,
Slowest_prewrite_rpc_detail varchar(2000) DEFAULT NULL,
Resolve_lock_time double DEFAULT NULL,
Local_latch_wait_time double DEFAULT NULL,
Write_keys bigint(22) DEFAULT NULL,
Write_size bigint(22) DEFAULT NULL,
Prewrite_region bigint(22) DEFAULT NULL,
Txn_retry bigint(22) DEFAULT NULL,
Cop_time double DEFAULT NULL,
Process_time double DEFAULT NULL,
Wait_time double DEFAULT NULL,
Backoff_time double DEFAULT NULL,
LockKeys_time double DEFAULT NULL,
Request_count bigint(20) unsigned DEFAULT NULL,
Total_keys bigint(20) unsigned DEFAULT NULL,
Get_snapshot_time double DEFAULT NULL,
Process_keys bigint(20) unsigned DEFAULT NULL,
Rocksdb_delete_skipped_count bigint(20) unsigned DEFAULT NULL,
Rocksdb_key_skipped_count bigint(20) unsigned DEFAULT NULL,
Rocksdb_block_cache_hit_count bigint(20) unsigned DEFAULT NULL,
Rocksdb_block_read_count bigint(20) unsigned DEFAULT NULL,
Rocksdb_block_read_byte bigint(20) unsigned DEFAULT NULL,
Rocksdb_block_read_time double DEFAULT NULL,
DB varchar(64) DEFAULT NULL,
Index_names varchar(100) DEFAULT NULL,
Is_internal tinyint(1) DEFAULT NULL,
Digest varchar(64) DEFAULT NULL,
Stats varchar(512) DEFAULT NULL,
Num_cop_tasks bigint(20) DEFAULT NULL,
Cop_proc_avg double DEFAULT NULL,
Cop_proc_p90 double DEFAULT NULL,
Cop_proc_max double DEFAULT NULL,
Cop_proc_addr varchar(64) DEFAULT NULL,
Cop_wait_avg double DEFAULT NULL,
Cop_wait_p90 double DEFAULT NULL,
Cop_wait_max double DEFAULT NULL,
Cop_wait_addr varchar(64) DEFAULT NULL,
Cop_backoff_regionMiss_total_times bigint(20) DEFAULT NULL,
Cop_backoff_regionMiss_total_time double DEFAULT NULL,
Cop_backoff_regionMiss_max_time double DEFAULT NULL,
Cop_backoff_regionMiss_avg_time double DEFAULT NULL,
Cop_backoff_regionMiss_p90_time double DEFAULT NULL,
Mem_max bigint(20) DEFAULT NULL,
Disk_max bigint(20) DEFAULT NULL,
KV_total double DEFAULT NULL,
PD_total double DEFAULT NULL,
Backoff_total double DEFAULT NULL,
Write_sql_response_total double DEFAULT NULL,
Result_rows bigint(22) DEFAULT NULL,
Warnings longtext DEFAULT NULL,
Backoff_Detail varchar(4096) DEFAULT NULL,
Prepared tinyint(1) DEFAULT NULL,
Succ tinyint(1) DEFAULT NULL,
IsExplicitTxn tinyint(1) DEFAULT NULL,
IsWriteCacheTable tinyint(1) DEFAULT NULL,
IsSyncStatsFailed tinyint(1) DEFAULT NULL,
Plan_from_cache tinyint(1) DEFAULT NULL,
Plan_from_binding tinyint(1) DEFAULT NULL,
Has_more_results tinyint(1) DEFAULT NULL,
Resource_group varchar(64) DEFAULT NULL,
Request_unit_read double DEFAULT NULL,
Request_unit_write double DEFAULT NULL,
Time_queued_by_rc double DEFAULT NULL,
Plan longtext DEFAULT NULL,
Plan_digest varchar(128) DEFAULT NULL,
Binary_plan longtext DEFAULT NULL,
Prev_stmt longtext DEFAULT NULL,
Query longtext DEFAULT NULL
)
'''
)
def insert_row(row):
columns = ", ".join(row.keys())
values = ", ".join(f"'{v}'" if isinstance(v, str) else str(v) for v in row.values())
sql = f"INSERT INTO {table} ({columns}) VALUES ({values});"
#print (sql)
cursor.execute(sql)
for filename in slow_log_files:
row = {}
query = ''
# Open the file in read mode
with open(filename, 'r') as file:
# Read each line in the file
for line in file:
col = line[2:].strip().split(": ")
if col[0] == 'Time':
if row:
row['Query'] = query.strip().replace("'",'"')
insert_row(row)
query = ''
row = {}
# row['INSTANCE'] = instance
row[col[0]]=col[1]
elif col[0] in ('Txn_start_ts','Conn_ID','Num_cop_tasks','Mem_max','Result_rows'):
row[col[0]] = int(col[1])
elif col[0] in ('Query_time','Parse_time','Compile_time','Rewrite_time','Optimize_time','Wait_TS','KV_total','PD_total','Backoff_total','Write_sql_response_total','Request_unit_read','Request_unit_write','Time_queued_by_rc'):
row[col[0]] = float(col[1])
elif col[0] in ('DB','Digest','Stats','Warnings','Plan','Plan_digest','Binary_plan','Resource_group','Index_names''Prewrite_time','Prev_stmt'):
row[col[0]] = col[1].replace("'",'"')
elif col[0] == 'User@Host':
userhost = col[1].split('@')
row['User'] = userhost[0].strip()
row['Host'] = userhost[1].strip()
elif col[0] in ('Cop_time','Cop_proc_avg','Cop_wait_avg','Cop_backoff_regionMiss_total_times','Exec_retry_time'):
matches = re.findall(r'(\w+): ([\d\.]+)', line[2:])
for key, value in matches:
row[key] = float(value) if '.' in value else int(value)
elif col[0] == 'Prewrite_time':
match = re.search(r'(Prewrite_Backoff_types: \[.*?\])', line[2:])
if match:
result = match.group(1)
rest = line[2:].replace(result,'')
row['Backoff_types'] = result.strip().replace('Prewrite_Backoff_types: ','')
else:
rest = line[2:]
d = rest.split('Slowest_prewrite_rpc_detail: ')
matches = re.findall(r'(\w+): ([\d\.]+)', d[0])
for key, value in matches:
row[key] = float(value) if '.' in value else int(value)
row['Slowest_prewrite_rpc_detail'] = d[1].replace('Slowest_prewrite_rpc_detail: ','').strip()
elif col[0] in ('Is_internal','Prepared','Plan_from_cache','Plan_from_binding','Has_more_results','Succ','IsExplicitTxn','IsSyncStatsFailed'):
row[col[0]] = 0 if col[1] == 'false' else 1
elif line[0] != '#':
query = query + line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment