Skip to content

Instantly share code, notes, and snippets.

@sqybi
Created February 14, 2020 13:50
Show Gist options
  • Save sqybi/1458071ee2757f59315cda21337b8927 to your computer and use it in GitHub Desktop.
Save sqybi/1458071ee2757f59315cda21337b8927 to your computer and use it in GitHub Desktop.
Import Overwatch statslab csv to MongoDB
#!/usr/bin/env python3
from pathlib import Path
import pymongo
DB_URL = 'mongodb://localhost:12345/' # Use your MongoDB URL
DATA_BASE_PATH = Path("../data/") # Your csv file should look like: ../data/2019/phs_2019_stage.csv
DATA_YEARS = ['2019']
DATA_FILE_FORMAT = 'phs_{year}_{stage}.csv'
DATA_STAGES = ['stage_1', 'stage_2', 'stage_3', 'stage_4', 'playoffs']
mongo_client = pymongo.MongoClient(DB_URL)
mongo_db = mongo_client['owl_stats']
collection = mongo_db['raw_data']
def main() -> None:
for year in DATA_YEARS:
for stage in DATA_STAGES:
file_path = DATA_BASE_PATH / year / DATA_FILE_FORMAT.format(year=year, stage=stage)
if not file_path.exists():
continue
print('Processing <{}>...'.format(file_path))
results = []
processed = 0
with file_path.open(encoding='utf-8-sig') as f:
content = f.readlines()
header = list(map(str.strip, content[0].split(',')))
header_len = len(header)
for line in content[1:]:
data = line.split(',')
result = {
'year': year,
'stage': stage,
}
for i in range(header_len):
data[i] = data[i].strip()
if data[i].isdigit():
result[header[i]] = int(data[i])
elif data[i].replace('.', '', 1).isdigit():
result[header[i]] = float(data[i])
else:
result[header[i]] = data[i]
results.append(result)
processed += 1
if processed % 10000 == 0:
print('{} lines processed...'.format(processed))
print('All processed, uploading...')
collection.insert_many(results)
print('Uploaded!')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment