Yvand · January 31, 2025 16:42
diff --git a/parse-dsc-logs.py b/parse-dsc-logs.py
 #!/usr/bin/env python

 # This script parses log files generated by the Azure DSC extention to compute the time taken by each resource to apply its configuration.
 # It prints the result and generates an output.csv file in the path specified
 # Logs of the Azure DSC extention are typically located in C:\WindowsAzure\Logs\Plugins\Microsoft.Powershell.DSC\2.83.1.0

 # For each resource, it detects the End Set with the time taken, as below:
 # VERBOSE: [2025-01-17 10:21:04Z] [VERBOSE] [SP]: LCM:  [ End    Set      ]  [[cChocoPackageInstaller]InstallAzureDataStudio]  in 9.5250 seconds.
 # VERBOSE: [2025-01-17 10:21:48Z] [VERBOSE] [SP]: LCM:  [ End    Set      ]  [[ADObjectPermissionEntry]GrantReplicatingDirectoryChanges]  in 0.2660 seconds.

 # usage: python3 parse-dsc-logs.py /path/to/log/files/dir

 import os
 import glob
 import io
 import re
 from sys import argv
 import csv
 import socket

 script, path = argv
 # path = '~/dev/data/dsc/dsc-dc-WS2025-slow'
 timeTakenPerResource = {}

 # Process a specific file
 def parseFile(file: io.TextIOWrapper):
    print(f"Parsing log file \"{file.name}\"...")
    textContinuesOnNextLine = False
    fullLine = ""
    for line in file.readlines():
        if textContinuesOnNextLine == True:
            fullLine += line
            textContinuesOnNextLine = False
            parseEndSetLine(fullLine)
            fullLine = ""
        
        # Test if current line is an "End Set" resource
        lineContainsEndSet = re.search(r"\[\sEnd\s*Set\s*\]", line)
        if lineContainsEndSet:
            # Test if line ends with "in xx.xx seconds." (if yes, then it is complete)
            lineIsComplete = re.search(r"in\s*\d*.\d*\s*seconds\.$", line)
            if lineIsComplete is None:
                fullLine = line.rstrip('\n')
                textContinuesOnNextLine = True
                continue
            else:
                parseEndSetLine(line)
        
 # Process the end set line to extract the resource name and the time taken
 def parseEndSetLine(endSetResourceLine: str):
    # This regex ensures the line meets this pattern: "[ End Set ]" "[[resource] name]"  "xx.xx seconds"
    # VERBOSE: [2025-01-17 10:21:04Z] [VERBOSE] [SP]: LCM:  [ End    Set      ]  [[cChocoPackageInstaller]InstallAzureDataStudio]  in 9.5250 seconds.
    # VERBOSE: [2025-01-17 10:21:48Z] [VERBOSE] [SP]: LCM:  [ End    Set      ]  [[ADObjectPermissionEntry]GrantReplicatingDirectoryChanges]  in 0.2660 seconds.
    regexResult = re.search(r"\[\sEnd\s*Set\s*\].*(\[\[.*\].*\]).*in\s*(\d*.\d*) seconds", endSetResourceLine)
    if regexResult is None:
        return
    if len(regexResult.groups()) != 2:
        return
    resourceName = regexResult.group(1)
    timeTaken = regexResult.group(2)
    
    # 1 resource may have multiple "end set" lines
    if resourceName not in timeTakenPerResource:
        timeTakenPerResource[resourceName] = "{0};{1};{2}".format(resourceName, timeTaken, timeTaken)
    else:
        resourceData = str(timeTakenPerResource[resourceName]).split(";")
        totalTime = float(resourceData[1]) + float(timeTaken)
        timeTakenPerRun = timeTaken
        if len(resourceData) > 2:
            timeTakenPerRun = "{0} + {1}".format(resourceData[2], timeTaken)
        timeTakenPerResource[resourceName] = "{0};{1};{2}".format(resourceName, totalTime, timeTakenPerRun)

 if __name__ == "__main__":
    print(f"Processing log files in path \"{path}\"...")
    # Files must be sorted by name to process the oldest first
    for filename in sorted(glob.glob(os.path.join(path, 'DscExtensionHandler*.log'))):
        with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
            parseFile(f)

    filename = "dsc-resources-time-taken-{}.csv".format(socket.gethostname())
    with open(os.path.join(path, filename), 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=";")
        for key, value in timeTakenPerResource.items():
            resourceData = str(value).split(";")
            print(f"\"{key}\" applied its configuration in {resourceData[1]} seconds.")
            writer.writerow(resourceData)

    print(f'Finished.')
	#!/usr/bin/env python

	# This script parses log files generated by the Azure DSC extention to compute the time taken by each resource to apply its configuration.
	# It prints the result and generates an output.csv file in the path specified
	# Logs of the Azure DSC extention are typically located in C:\WindowsAzure\Logs\Plugins\Microsoft.Powershell.DSC\2.83.1.0

	# For each resource, it detects the End Set with the time taken, as below:
	# VERBOSE: [2025-01-17 10:21:04Z] [VERBOSE] [SP]: LCM: [ End Set ] [[cChocoPackageInstaller]InstallAzureDataStudio] in 9.5250 seconds.
	# VERBOSE: [2025-01-17 10:21:48Z] [VERBOSE] [SP]: LCM: [ End Set ] [[ADObjectPermissionEntry]GrantReplicatingDirectoryChanges] in 0.2660 seconds.

	# usage: python3 parse-dsc-logs.py /path/to/log/files/dir

	import os
	import glob
	import io
	import re
	from sys import argv
	import csv
	import socket

	script, path = argv
	# path = '~/dev/data/dsc/dsc-dc-WS2025-slow'
	timeTakenPerResource = {}

	# Process a specific file
	def parseFile(file: io.TextIOWrapper):
	print(f"Parsing log file \"{file.name}\"...")
	textContinuesOnNextLine = False
	fullLine = ""
	for line in file.readlines():
	if textContinuesOnNextLine == True:
	fullLine += line
	textContinuesOnNextLine = False
	parseEndSetLine(fullLine)
	fullLine = ""

	# Test if current line is an "End Set" resource
	lineContainsEndSet = re.search(r"\[\sEnd\sSet\s\]", line)
	if lineContainsEndSet:
	# Test if line ends with "in xx.xx seconds." (if yes, then it is complete)
	lineIsComplete = re.search(r"in\s\d.\d\sseconds\.$", line)
	if lineIsComplete is None:
	fullLine = line.rstrip('\n')
	textContinuesOnNextLine = True
	continue
	else:
	parseEndSetLine(line)

	# Process the end set line to extract the resource name and the time taken
	def parseEndSetLine(endSetResourceLine: str):
	# This regex ensures the line meets this pattern: "[ End Set ]" "[[resource] name]" "xx.xx seconds"
	# VERBOSE: [2025-01-17 10:21:04Z] [VERBOSE] [SP]: LCM: [ End Set ] [[cChocoPackageInstaller]InstallAzureDataStudio] in 9.5250 seconds.
	# VERBOSE: [2025-01-17 10:21:48Z] [VERBOSE] [SP]: LCM: [ End Set ] [[ADObjectPermissionEntry]GrantReplicatingDirectoryChanges] in 0.2660 seconds.
	regexResult = re.search(r"\[\sEnd\sSet\s\].(\[\[.\].\]).in\s(\d.\d*) seconds", endSetResourceLine)
	if regexResult is None:
	return
	if len(regexResult.groups()) != 2:
	return
	resourceName = regexResult.group(1)
	timeTaken = regexResult.group(2)

	# 1 resource may have multiple "end set" lines
	if resourceName not in timeTakenPerResource:
	timeTakenPerResource[resourceName] = "{0};{1};{2}".format(resourceName, timeTaken, timeTaken)
	else:
	resourceData = str(timeTakenPerResource[resourceName]).split(";")
	totalTime = float(resourceData[1]) + float(timeTaken)
	timeTakenPerRun = timeTaken
	if len(resourceData) > 2:
	timeTakenPerRun = "{0} + {1}".format(resourceData[2], timeTaken)
	timeTakenPerResource[resourceName] = "{0};{1};{2}".format(resourceName, totalTime, timeTakenPerRun)

	if __name__ == "__main__":
	print(f"Processing log files in path \"{path}\"...")
	# Files must be sorted by name to process the oldest first
	for filename in sorted(glob.glob(os.path.join(path, 'DscExtensionHandler*.log'))):
	with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
	parseFile(f)

	filename = "dsc-resources-time-taken-{}.csv".format(socket.gethostname())
	with open(os.path.join(path, filename), 'w', newline='') as csvfile:
	writer = csv.writer(csvfile, delimiter=";")
	for key, value in timeTakenPerResource.items():
	resourceData = str(value).split(";")
	print(f"\"{key}\" applied its configuration in {resourceData[1]} seconds.")
	writer.writerow(resourceData)

	print(f'Finished.')