Last active
January 31, 2025 16:42
-
-
Save Yvand/777a2e97c5d07198b926d7bb4f12ab04 to your computer and use it in GitHub Desktop.
Parse log files generated by the Azure DSC extention to compute the time taken by each resource to apply its configuration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This script parses log files generated by the Azure DSC extention to compute the time taken by each resource to apply its configuration. | |
# It prints the result and generates an output.csv file in the path specified | |
# Logs of the Azure DSC extention are typically located in C:\WindowsAzure\Logs\Plugins\Microsoft.Powershell.DSC\2.83.1.0 | |
# For each resource, it detects the End Set with the time taken, as below: | |
# VERBOSE: [2025-01-17 10:21:04Z] [VERBOSE] [SP]: LCM: [ End Set ] [[cChocoPackageInstaller]InstallAzureDataStudio] in 9.5250 seconds. | |
# VERBOSE: [2025-01-17 10:21:48Z] [VERBOSE] [SP]: LCM: [ End Set ] [[ADObjectPermissionEntry]GrantReplicatingDirectoryChanges] in 0.2660 seconds. | |
# usage: python3 parse-dsc-logs.py /path/to/log/files/dir | |
import os | |
import glob | |
import io | |
import re | |
from sys import argv | |
import csv | |
import socket | |
script, path = argv | |
# path = '~/dev/data/dsc/dsc-dc-WS2025-slow' | |
timeTakenPerResource = {} | |
# Process a specific file | |
def parseFile(file: io.TextIOWrapper): | |
print(f"Parsing log file \"{file.name}\"...") | |
textContinuesOnNextLine = False | |
fullLine = "" | |
for line in file.readlines(): | |
if textContinuesOnNextLine == True: | |
fullLine += line | |
textContinuesOnNextLine = False | |
parseEndSetLine(fullLine) | |
fullLine = "" | |
# Test if current line is an "End Set" resource | |
lineContainsEndSet = re.search(r"\[\sEnd\s*Set\s*\]", line) | |
if lineContainsEndSet: | |
# Test if line ends with "in xx.xx seconds." (if yes, then it is complete) | |
lineIsComplete = re.search(r"in\s*\d*.\d*\s*seconds\.$", line) | |
if lineIsComplete is None: | |
fullLine = line.rstrip('\n') | |
textContinuesOnNextLine = True | |
continue | |
else: | |
parseEndSetLine(line) | |
# Process the end set line to extract the resource name and the time taken | |
def parseEndSetLine(endSetResourceLine: str): | |
# This regex ensures the line meets this pattern: "[ End Set ]" "[[resource] name]" "xx.xx seconds" | |
# VERBOSE: [2025-01-17 10:21:04Z] [VERBOSE] [SP]: LCM: [ End Set ] [[cChocoPackageInstaller]InstallAzureDataStudio] in 9.5250 seconds. | |
# VERBOSE: [2025-01-17 10:21:48Z] [VERBOSE] [SP]: LCM: [ End Set ] [[ADObjectPermissionEntry]GrantReplicatingDirectoryChanges] in 0.2660 seconds. | |
regexResult = re.search(r"\[\sEnd\s*Set\s*\].*(\[\[.*\].*\]).*in\s*(\d*.\d*) seconds", endSetResourceLine) | |
if regexResult is None: | |
return | |
if len(regexResult.groups()) != 2: | |
return | |
resourceName = regexResult.group(1) | |
timeTaken = regexResult.group(2) | |
# 1 resource may have multiple "end set" lines | |
if resourceName not in timeTakenPerResource: | |
timeTakenPerResource[resourceName] = "{0};{1};{2}".format(resourceName, timeTaken, timeTaken) | |
else: | |
resourceData = str(timeTakenPerResource[resourceName]).split(";") | |
totalTime = float(resourceData[1]) + float(timeTaken) | |
timeTakenPerRun = timeTaken | |
if len(resourceData) > 2: | |
timeTakenPerRun = "{0} + {1}".format(resourceData[2], timeTaken) | |
timeTakenPerResource[resourceName] = "{0};{1};{2}".format(resourceName, totalTime, timeTakenPerRun) | |
if __name__ == "__main__": | |
print(f"Processing log files in path \"{path}\"...") | |
# Files must be sorted by name to process the oldest first | |
for filename in sorted(glob.glob(os.path.join(path, 'DscExtensionHandler*.log'))): | |
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode | |
parseFile(f) | |
filename = "dsc-resources-time-taken-{}.csv".format(socket.gethostname()) | |
with open(os.path.join(path, filename), 'w', newline='') as csvfile: | |
writer = csv.writer(csvfile, delimiter=";") | |
for key, value in timeTakenPerResource.items(): | |
resourceData = str(value).split(";") | |
print(f"\"{key}\" applied its configuration in {resourceData[1]} seconds.") | |
writer.writerow(resourceData) | |
print(f'Finished.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment