Last active
October 4, 2016 01:53
-
-
Save gocreating/290742d3f0e423af43c9458b6062467b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import _mysql | |
import time | |
FILE_NAMES = [ | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160918', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160919', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160920', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160921', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160922', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160923', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160924', | |
'C:\Users\go-creating\Desktop\data-mining-hw1\dataset\data.log-20160925', | |
] | |
SKIP_LINE_COUNT = 26 | |
db=_mysql.connect( | |
host='localhost', | |
user='root', | |
passwd='root', | |
db='data_mining' | |
) | |
for filename in FILE_NAMES: | |
with open(filename) as f: | |
print '================' | |
print 'start parsing', filename | |
# line indicator | |
i = SKIP_LINE_COUNT | |
startTime = time.time() | |
for _ in range(SKIP_LINE_COUNT): | |
next(f) | |
for line in f: | |
# debugger | |
i = i + 1 | |
if i % 10000 == 0: | |
print i, 'entries parsed' | |
# remove \n character | |
line = line[:-1] | |
lineParts = line.split(' ') | |
# avoid empty line | |
if len(lineParts) == 2: | |
entries = lineParts[1].split('|')[1:] | |
fields = ['`topic_group`'] | |
values = ['\'' + lineParts[0].split('/')[1] + '\''] | |
for entry in entries: | |
parts = entry.replace('\'', '').split('=') | |
# avoid empty entry | |
if len(parts) == 2: | |
field = '`' + parts[0] + '`' | |
# avoid `NaN` sensor value | |
value = ('\'' + parts[1] + '\'').replace("'NaN'", 'NULL') | |
# avoid duplicate fields | |
if field not in fields: | |
fields.append(field) | |
values.append(value) | |
queryString = ( | |
""" | |
INSERT INTO raw (%s) VALUES (%s) | |
""" % (','.join(fields), ','.join(values)) | |
) | |
try: | |
db.query(queryString) | |
except Exception as err: | |
# error reporting | |
print '' | |
print '==================== %d line ====================' % i | |
print line | |
print '' | |
print fields | |
print '' | |
print values | |
print '' | |
print err | |
elapsedTime = time.time() - startTime | |
print i - SKIP_LINE_COUNT, 'lines parsed in ', elapsedTime, 'seconds' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE `data_mining`.`raw`; | |
CREATE TABLE raw ( | |
`topic_group` VARCHAR(255) DEFAULT NULL, | |
`ver_format` VARCHAR(255) DEFAULT NULL, | |
`fmt_opt` VARCHAR(255) DEFAULT NULL, | |
`app` VARCHAR(255) DEFAULT NULL, | |
`ver_app` VARCHAR(255) DEFAULT NULL, | |
`device_id` VARCHAR(255) DEFAULT NULL, | |
`tick` BIGINT DEFAULT NULL, | |
`date` DATE DEFAULT NULL, | |
`time` TIME DEFAULT NULL, | |
`device` VARCHAR(255) DEFAULT NULL, | |
`s_0` FLOAT DEFAULT NULL, | |
`s_1` FLOAT DEFAULT NULL, | |
`s_2` FLOAT DEFAULT NULL, | |
`s_3` FLOAT DEFAULT NULL, | |
`s_4` FLOAT DEFAULT NULL, | |
`s_b0` FLOAT DEFAULT NULL, | |
`s_b1` FLOAT DEFAULT NULL, | |
`s_d0` FLOAT DEFAULT NULL, | |
`s_d1` VARCHAR(255) DEFAULT NULL, | |
`s_d2` FLOAT DEFAULT NULL, | |
`s_d3` FLOAT DEFAULT NULL, | |
`s_g0` FLOAT DEFAULT NULL, | |
`s_g1` FLOAT DEFAULT NULL, | |
`s_g2` FLOAT DEFAULT NULL, | |
`s_g3` FLOAT DEFAULT NULL, | |
`s_g4` FLOAT DEFAULT NULL, | |
`s_g5` FLOAT DEFAULT NULL, | |
`s_g6` FLOAT DEFAULT NULL, | |
`s_g7` FLOAT DEFAULT NULL, | |
`s_g8` FLOAT DEFAULT NULL, | |
`s_h0` FLOAT DEFAULT NULL, | |
`h` FLOAT DEFAULT NULL, | |
`s_h1` FLOAT DEFAULT NULL, | |
`s_h2` FLOAT DEFAULT NULL, | |
`s_h3` FLOAT DEFAULT NULL, | |
`s_l0` FLOAT DEFAULT NULL, | |
`s_l1` FLOAT DEFAULT NULL, | |
`s_l2` FLOAT DEFAULT NULL, | |
`s_l3` FLOAT DEFAULT NULL, | |
`s_o` FLOAT DEFAULT NULL, | |
`s_t0` FLOAT DEFAULT NULL, | |
`s_t1` FLOAT DEFAULT NULL, | |
`s_t2` FLOAT DEFAULT NULL, | |
`s_t3` FLOAT DEFAULT NULL, | |
`s_w0` FLOAT DEFAULT NULL, | |
`s_w1` FLOAT DEFAULT NULL, | |
`s_r10` FLOAT DEFAULT NULL, | |
`s_r60` FLOAT DEFAULT NULL, | |
`s_s` FLOAT DEFAULT NULL, | |
`s_n0` FLOAT DEFAULT NULL, | |
`s_n1` FLOAT DEFAULT NULL, | |
`PM10` FLOAT DEFAULT NULL, | |
`PM2_5` FLOAT DEFAULT NULL, | |
`Temperature` FLOAT DEFAULT NULL, | |
`Humidity` FLOAT DEFAULT NULL, | |
`gps_lat` FLOAT DEFAULT NULL, | |
`gps_lon` FLOAT DEFAULT NULL, | |
`gps_fix` INT DEFAULT NULL, | |
`gps_num` INT DEFAULT NULL, | |
`gps_alt` VARCHAR(255) DEFAULT NULL, | |
`FAKE_GPS` VARCHAR(255) DEFAULT NULL, | |
`SiteName` VARCHAR(255) DEFAULT NULL, | |
`SiteID` VARCHAR(255) DEFAULT NULL | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment