tomrittervg · May 1, 2025 20:59
diff --git a/mmseparate.py b/mmseparate.py
 #!/usr/bin/env python3

 # First MOZ_LOG="MessageManager:5" ./mach run > mmlog.txt 2>&1
 # Then ./mmseparate.py mmlog.txt

 import os
 import re
 import sys
 import time
 import pathlib
 import argparse
 import jsbeautifier

 opts = jsbeautifier.default_options()
 opts.indent_size = 2

 LINE_PARSE = re.compile(r"\[([a-zA-Z]+) ([0-9]+): ([a-zA-Z ]+)\]: ([DV])/MessageManager ([-0-9]+) (.+)$")
 TOPIC_PARSE = re.compile("(.+) Message: (.+) in process type: .+")

 class MMLine():
 	def _parse(self, l):
 		m = LINE_PARSE.match(l)
 		if not m:
 			import pdb
 			pdb.set_trace()
 			print("Could not parse line:", l)
 			sys.exit(1)

 		if len(m.groups()) != 6:
 			print("Did not get correct number of line groups from:", l)
 			sys.exit(1)

 		assert self.process_type == m.groups()[0] or not self.process_type, "Assertion failed on (%s, %s): %s" % (self.process_type, m.groups()[0], l)
 		assert self.process_pid == m.groups()[1] or not self.process_pid, "Assertion failed on (%s, %s): %s" % (self.process_pid, m.groups()[1], l)
 		assert self.thread == m.groups()[2] or not self.thread, "Assertion failed on (%s, %s): %s" % (self.thread, m.groups()[2], l)
 		assert self.msg_id == m.groups()[4] or not self.msg_id, "Assertion failed on (%s, %s): %s" % (self.msg_id, m.groups()[4], l)
 		
 		assert (m.groups()[3] == "D" and not self.process_type) or m.groups()[3] == "V", "Assertion failed on " + l

 		self.process_type = m.groups()[0]
 		self.process_pid = m.groups()[1]
 		self.thread = m.groups()[2]
 		self.msg_id = m.groups()[4]
 		
 		return m.groups()[3], m.groups()[5]

 	def __init__(self, l):
 		self.process_type = ""
 		self.process_pid = ""
 		self.thread = ""
 		self.msg_id = ""
 		self.verbosity = ""
 		self.content = ""
 		self.callsite = ""
 		self.topic = ""

 		verbosity, payload = self._parse(l)

 		if verbosity == "D":
 			self.type = "debug"

 			m = TOPIC_PARSE.search(payload)
 			if not m or len(m.groups()) != 2:
 				print("Did not get correct number of topic groups from:", payload)
 				sys.exit(1)

 			self.callsite = m.groups()[0]
 			self.topic = m.groups()[1]
 		else:
 			self.type = "verbose"

 	def add_content(self, l):
 		self.message = self._parse(l)[1]

 def process(filename, output_dir):
 	# First separate the lines by pid because we have interleaving of log lines
 	lines_by_pid = {}

 	# Used to map pids to a process type (e.g. parent)
 	pid_to_type = {}

 	# Used to associate the Verbose messages to Debug temporarily
 	tmp_correlation_bucket = {}

 	# Used to store the data
 	data_by_pid = {}
 	data_by_topic = {}

 	with open(filename, "r") as f:
 		for l in f:
 			if "MessageManager" not in l:
 				continue
 			if not l.strip():
 				continue

 			line = MMLine(l)

 			if line.process_pid not in lines_by_pid:
 				lines_by_pid[line.process_pid] = []
 				data_by_pid[line.process_pid] = []
 				pid_to_type[line.process_pid] = line.process_type

 			if line.topic and line.topic not in data_by_topic:
 				data_by_topic[line.topic] = []

 			lines_by_pid[line.process_pid].append(l)

 	for pid in lines_by_pid:
 		print("Processing %s (pid %s)" % (pid_to_type[pid], pid))

 		lines = iter(lines_by_pid[pid])
 		
 		for l in lines:
 			line = MMLine(l)
 		
 			if line.type == "debug":
 				tmp_correlation_bucket[line.msg_id] = line
 			else:
 				tmp_correlation_bucket[line.msg_id].add_content(l)
 				line = tmp_correlation_bucket[line.msg_id]

 				data_by_pid[pid].append(line)
 				data_by_topic[line.topic].append(line)

 	for pid in data_by_pid:
 		process_type = pid_to_type[pid]
 		output_file = os.path.join(output_dir, "%s.%s.log" % (process_type, pid))
 		print("Outputting all data for %s (pid %s) to %s" % (process_type, pid, output_file))

 		data = data_by_pid[pid]
 		with open(output_file, "w") as f:
 			for d in data:
 				f.write("------------------------------\n")
 				f.write("%s received in %s\n" % (d.topic, d.callsite))
 				message = jsbeautifier.beautify(d.message, opts)
 				f.write("  " + message.replace("\n", "\n  ") + "\n\n")

 	for topic in data_by_topic:
 		data = data_by_topic[topic]

 		process_types = {}
 		callsites = {}
 		for d in data:
 			if d.process_type not in process_types:
 				process_types[d.process_type] = 0
 			if d.callsite not in callsites:
 				callsites[d.callsite] = 0

 			process_types[d.process_type] += 1
 			callsites[d.callsite] += 1

 		output_file = os.path.join(output_dir, "topic.%s.log" % (topic))
 		print("Outputting all data for topic '%s' to %s" % (topic, output_file))
 		if len(callsites) > 1:
 			print("  Unusually, this topic was received in %s different callsites." % len(callsites))
 		if len(process_types) > 1:
 			print("  Unusually, this topic was received in %s different process types." % len(process_types))

 		with open(output_file, "w") as f:
 			f.write("Process Breakdown:\n")
 			for t in process_types:
 				f.write("  %s: %s\n" % (t, process_types[t]))

 			f.write("\n------------------------------\n")

 			f.write("Callsite Breakdown:\n")
 			for c in callsites:
 				f.write("  %s: %s\n" % (c, callsites[c]))

 			f.write("\n------------------------------\n")

 			for d in data:
 				f.write("------------------------------\n")
 				f.write("Received in %s in process %s (pid %s)\n" % (d.callsite, d.process_type, d.process_pid))
 				message = jsbeautifier.beautify(d.message, opts)
 				f.write("  " + message.replace("\n", "\n  ") + "\n\n")


 			

 if __name__ == "__main__":
 	parser = argparse.ArgumentParser()
 	parser.add_argument("-o", "--out-dir", default="mm-sorted-%s" % int(time.time()), help="Directory to write output logs in.")
 	parser.add_argument("log_files", action="append", help="Log files to parse.")
 	args = parser.parse_args()

 	if len(args.log_files) <= 0:
 		print("Supply log files to separate on the command line.")
 		sys.exit(1)

 	pathlib.Path(args.out_dir).mkdir(parents=True, exist_ok=True)

 	for f in args.log_files:
 		print("Processing ", f)
 		process(f, args.out_dir)
	#!/usr/bin/env python3

	# First MOZ_LOG="MessageManager:5" ./mach run > mmlog.txt 2>&1
	# Then ./mmseparate.py mmlog.txt

	import os
	import re
	import sys
	import time
	import pathlib
	import argparse
	import jsbeautifier

	opts = jsbeautifier.default_options()
	opts.indent_size = 2

	LINE_PARSE = re.compile(r"\[([a-zA-Z]+) ([0-9]+): ([a-zA-Z ]+)\]: ([DV])/MessageManager ([-0-9]+) (.+)$")
	TOPIC_PARSE = re.compile("(.+) Message: (.+) in process type: .+")

	class MMLine():
	def _parse(self, l):
	m = LINE_PARSE.match(l)
	if not m:
	import pdb
	pdb.set_trace()
	print("Could not parse line:", l)
	sys.exit(1)

	if len(m.groups()) != 6:
	print("Did not get correct number of line groups from:", l)
	sys.exit(1)

	assert self.process_type == m.groups()[0] or not self.process_type, "Assertion failed on (%s, %s): %s" % (self.process_type, m.groups()[0], l)
	assert self.process_pid == m.groups()[1] or not self.process_pid, "Assertion failed on (%s, %s): %s" % (self.process_pid, m.groups()[1], l)
	assert self.thread == m.groups()[2] or not self.thread, "Assertion failed on (%s, %s): %s" % (self.thread, m.groups()[2], l)
	assert self.msg_id == m.groups()[4] or not self.msg_id, "Assertion failed on (%s, %s): %s" % (self.msg_id, m.groups()[4], l)

	assert (m.groups()[3] == "D" and not self.process_type) or m.groups()[3] == "V", "Assertion failed on " + l

	self.process_type = m.groups()[0]
	self.process_pid = m.groups()[1]
	self.thread = m.groups()[2]
	self.msg_id = m.groups()[4]

	return m.groups()[3], m.groups()[5]

	def __init__(self, l):
	self.process_type = ""
	self.process_pid = ""
	self.thread = ""
	self.msg_id = ""
	self.verbosity = ""
	self.content = ""
	self.callsite = ""
	self.topic = ""

	verbosity, payload = self._parse(l)

	if verbosity == "D":
	self.type = "debug"

	m = TOPIC_PARSE.search(payload)
	if not m or len(m.groups()) != 2:
	print("Did not get correct number of topic groups from:", payload)
	sys.exit(1)

	self.callsite = m.groups()[0]
	self.topic = m.groups()[1]
	else:
	self.type = "verbose"

	def add_content(self, l):
	self.message = self._parse(l)[1]

	def process(filename, output_dir):
	# First separate the lines by pid because we have interleaving of log lines
	lines_by_pid = {}

	# Used to map pids to a process type (e.g. parent)
	pid_to_type = {}

	# Used to associate the Verbose messages to Debug temporarily
	tmp_correlation_bucket = {}

	# Used to store the data
	data_by_pid = {}
	data_by_topic = {}

	with open(filename, "r") as f:
	for l in f:
	if "MessageManager" not in l:
	continue
	if not l.strip():
	continue

	line = MMLine(l)

	if line.process_pid not in lines_by_pid:
	lines_by_pid[line.process_pid] = []
	data_by_pid[line.process_pid] = []
	pid_to_type[line.process_pid] = line.process_type

	if line.topic and line.topic not in data_by_topic:
	data_by_topic[line.topic] = []

	lines_by_pid[line.process_pid].append(l)

	for pid in lines_by_pid:
	print("Processing %s (pid %s)" % (pid_to_type[pid], pid))

	lines = iter(lines_by_pid[pid])

	for l in lines:
	line = MMLine(l)

	if line.type == "debug":
	tmp_correlation_bucket[line.msg_id] = line
	else:
	tmp_correlation_bucket[line.msg_id].add_content(l)
	line = tmp_correlation_bucket[line.msg_id]

	data_by_pid[pid].append(line)
	data_by_topic[line.topic].append(line)

	for pid in data_by_pid:
	process_type = pid_to_type[pid]
	output_file = os.path.join(output_dir, "%s.%s.log" % (process_type, pid))
	print("Outputting all data for %s (pid %s) to %s" % (process_type, pid, output_file))

	data = data_by_pid[pid]
	with open(output_file, "w") as f:
	for d in data:
	f.write("------------------------------\n")
	f.write("%s received in %s\n" % (d.topic, d.callsite))
	message = jsbeautifier.beautify(d.message, opts)
	f.write(" " + message.replace("\n", "\n ") + "\n\n")

	for topic in data_by_topic:
	data = data_by_topic[topic]

	process_types = {}
	callsites = {}
	for d in data:
	if d.process_type not in process_types:
	process_types[d.process_type] = 0
	if d.callsite not in callsites:
	callsites[d.callsite] = 0

	process_types[d.process_type] += 1
	callsites[d.callsite] += 1

	output_file = os.path.join(output_dir, "topic.%s.log" % (topic))
	print("Outputting all data for topic '%s' to %s" % (topic, output_file))
	if len(callsites) > 1:
	print(" Unusually, this topic was received in %s different callsites." % len(callsites))
	if len(process_types) > 1:
	print(" Unusually, this topic was received in %s different process types." % len(process_types))

	with open(output_file, "w") as f:
	f.write("Process Breakdown:\n")
	for t in process_types:
	f.write(" %s: %s\n" % (t, process_types[t]))

	f.write("\n------------------------------\n")

	f.write("Callsite Breakdown:\n")
	for c in callsites:
	f.write(" %s: %s\n" % (c, callsites[c]))

	f.write("\n------------------------------\n")

	for d in data:
	f.write("------------------------------\n")
	f.write("Received in %s in process %s (pid %s)\n" % (d.callsite, d.process_type, d.process_pid))
	message = jsbeautifier.beautify(d.message, opts)
	f.write(" " + message.replace("\n", "\n ") + "\n\n")




	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("-o", "--out-dir", default="mm-sorted-%s" % int(time.time()), help="Directory to write output logs in.")
	parser.add_argument("log_files", action="append", help="Log files to parse.")
	args = parser.parse_args()

	if len(args.log_files) <= 0:
	print("Supply log files to separate on the command line.")
	sys.exit(1)

	pathlib.Path(args.out_dir).mkdir(parents=True, exist_ok=True)

	for f in args.log_files:
	print("Processing ", f)
	process(f, args.out_dir)