Last active
July 10, 2025 21:15
-
-
Save fomightez/f036794b91d10761466341644b3c1cac to your computer and use it in GitHub Desktop.
Evaluating date timestamp info in typical long and short read pipeline.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def collect_time_info(input_text_filepath): | |
''' | |
Take the entire row of columns and return that row of columns plus extra | |
columns with the details gleaned from timestamps in the the corresponding | |
`logs/???????_<accession>.out` | |
''' | |
with open(input_text_filepath, 'r') as thelog_stdout_file: | |
std_out_string=thelog_stdout_file.read() | |
# with std_out log read in, parse it for the informaiton in the three timestamps | |
start_ts = std_out_string.split('Current timestamp at start: ')[1].split('\n')[0].strip() | |
after_data_obtained_ts = std_out_string.split('Current timestamp before other steps but after fastq obtained: ')[1].split('\n')[0].strip() | |
after_main_events_ts = std_out_string.split('Current timestamp after: ')[1].split('\n')[0].strip() | |
# determine time duration between events in minutes | |
# For Total Time | |
minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60) | |
hours = int(minutes_diff // 60) | |
mins = int(minutes_diff % 60) | |
if minutes_diff > 60: | |
print(f"Total time processing run: {minutes_diff}m ({hours}h {mins}m)") | |
else: | |
print(f"Total time processing run: {minutes_diff}m") | |
# For Download Time | |
minutes_diff = round((datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(start_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60) | |
hours = int(minutes_diff // 60) | |
mins = int(minutes_diff % 60) | |
if minutes_diff > 60: | |
print(f"Download time: {minutes_diff}m ({hours}h {mins}m)") | |
else: | |
print(f"Download time: {minutes_diff}m") | |
# For Main Processing Time | |
minutes_diff = round((datetime.strptime(after_main_events_ts, "%Y-%m-%d_%H-%M-%S") - datetime.strptime(after_data_obtained_ts, "%Y-%m-%d_%H-%M-%S")).total_seconds() / 60) | |
hours = int(minutes_diff // 60) | |
mins = int(minutes_diff % 60) | |
if minutes_diff > 60: | |
print(f"Main processing post-dowbload: {minutes_diff}m ({hours}h {mins}m)") | |
else: | |
print(f"Main processing post-dowbload: {minutes_diff}m") | |
if __name__ == "__main__": | |
import sys | |
from datetime import datetime | |
try: | |
input_text_filepath = sys.argv[1] | |
except IndexError: | |
import rich | |
rich.print("\n[bold red]I suspect you forgot to specify the file to read?[/bold red]\n **EXITING !!**[/bold red]\n"); sys.exit(1) | |
import pandas as pd | |
import pandas as pd | |
from openpyxl import Workbook | |
collect_time_info(input_text_filepath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment