Created
December 13, 2019 11:32
-
-
Save snopoke/6e862fc16320ade2eb6548d1ba519a69 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import shutil | |
import gzip | |
from collections import Counter | |
from elasticsearch.exceptions import ConnectionTimeout | |
from corehq.util.timezones.utils import parse_date | |
path = '/home/cchq/form_dates' | |
try: | |
shutil.rmtree(path) | |
except FileNotFoundError: | |
pass | |
os.makedirs(path) | |
gstart = datetime.utcnow() | |
start = datetime(2019, 10, 1) | |
tc = 0 | |
day_summary = Counter() | |
while start < end: | |
all_data_filename = f"{start.strftime('%Y-%m-%d')}.csv.gz" | |
summary_filename = f"{start.strftime('%Y-%m-%d')}_summary.csv" | |
all_data_headers = [] | |
if not os.path.isfile(os.path.join(path, all_data_filename)): | |
all_data_headers = ['form_id', 'completed_on', 'received_on', 'days_diff'] | |
if day_summary: | |
print(f' Writing summary {summary_filename}') | |
with open(os.path.join(path, summary_filename), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(['days_diff', 'form_count']) | |
writer.writerows(sorted(day_summary.items())) | |
day_summary = Counter() | |
count = 0 | |
se = start + timedelta(hours=1) | |
rows = [] | |
try: | |
forms = FormES().domain('icds-cas').completed(gte=start, lt=se).source(['form.meta.timeEnd', 'received_on']).scroll() | |
for form in forms: | |
count += 1 | |
completed_on = form['form']['meta']['timeEnd'] | |
received_on = form['received_on'] | |
days_diff = parse_date(received_on).date() - parse_date(completed_on).date() | |
rows.append([form['_id'], completed_on, received_on, days_diff.days]) | |
except ConnectionTimeout: | |
print(f' retrying batch: {start} to {se}') | |
continue | |
with gzip.open(os.path.join(path, all_data_filename), 'at') as f: | |
writer = csv.writer(f) | |
if all_data_headers: | |
writer.writerow(all_data_headers) | |
writer.writerows(rows) | |
day_summary.update([row[3] for row in rows]) | |
tc += count | |
print(f'[{datetime.utcnow()}] Runtime: {datetime.utcnow() - gstart}, Progress: {start} to {se}: {count} ({tc})') | |
start = se |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment