Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save fomightez/f036794b91d10761466341644b3c1cac to your computer and use it in GitHub Desktop.
Save fomightez/f036794b91d10761466341644b3c1cac to your computer and use it in GitHub Desktop.
Evaluating date timestamp info in typical long and short read pipeline.
def collect_time_info(input_text_filepath):
'''
Take the entire row of columns and return that row of columns plus extra
columns with the details gleaned from timestamps in the the corresponding
`logs/???????_<accession>.out`
'''
with open(input_text_filepath, 'r') as thelog_stdout_file:
std_out_string=thelog_stdout_file.read()
# with std_out log read in, parse it for the informaiton in the three timestamps
start_ts = std_out_string.split('Current timestamp at start: ')[1].split('\n')[0].strip()
after_data_obtained_ts = std_out_string.split('Current timestamp before other steps but after fastq obtained: ')[1].split('\n')[0].strip()
after_main_events_ts = std_out_string.split('Current timestamp after: ')[1].split('\n')[0].strip()
# determine time duration between events in minutes
# For Total Time
minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
hours = int(minutes_diff // 60)
mins = int(minutes_diff % 60)
if minutes_diff > 60:
print(f"Total time processing run: {minutes_diff}m ({hours}h {mins}m)")
else:
print(f"Total time processing run: {minutes_diff}m")
# For Download Time
minutes_diff = round((datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
hours = int(minutes_diff // 60)
mins = int(minutes_diff % 60)
if minutes_diff > 60:
print(f"Download time: {minutes_diff}m ({hours}h {mins}m)")
else:
print(f"Download time: {minutes_diff}m")
# For Main Processing Time
minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60)
hours = int(minutes_diff // 60)
mins = int(minutes_diff % 60)
if minutes_diff > 60:
print(f"Main processing post-dowbload: {minutes_diff}m ({hours}h {mins}m)")
else:
print(f"Main processing post-dowbload: {minutes_diff}m")
if __name__ == "__main__":
import sys
from datetime import datetime
try:
input_text_filepath = sys.argv[1]
except IndexError:
import rich
rich.print("\n[bold red]I suspect you forgot to specify the file to read?[/bold red]\n **EXITING !!**[/bold red]\n"); sys.exit(1)
import pandas as pd
import pandas as pd
from openpyxl import Workbook
collect_time_info(input_text_filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment