kspeeckaert · March 14, 2016 14:32
diff --git a/sqlserver_tracefiles.py b/sqlserver_tracefiles.py
 import timeit
 from lxml import etree
 from pandas import DataFrame
 from uuid import uuid4

 # List of process attributes to retrieve
 def analyze_deadlock(elem, data_locks, data_procs):
    # Generate a UUID to identify all processes belonging to the same deadlock event
    dl_uuid = uuid4().hex
    databases = set()
    data_lock = {}
    
    data_lock['dl_uuid'] = dl_uuid
    data_lock['victim_id'] = elem.get('victim')
    
    for proc in elem.find('./process-list').iterchildren(tag='process'):
        data_proc = {}
        data_proc['dl_uuid'] = dl_uuid
        # Add all process XML attributes
        data_proc.update(proc.attrib)
        
        # Look for the SQL statement in the executionStack/frame element
        # Disregard SQL statements with placeholders (e.g. @1)
        # Also, only consider frames with adhoc as procname (not unknown or anything else)
        for frame in proc.find('./executionStack').iterchildren(tag='frame'):
            if (frame.get('procname') == 'adhoc') and ('@1' not in frame.text):
                data_proc['sql'] = frame.text.strip()
                break
        else:
            # If no SQL statement could be found, take the SQL statement from the 
            # inputbuf element
            data_proc['sql'] = proc.find('./inputbuf').text.strip()
        
        data_procs.append(data_proc)
        databases.add(proc.get('currentdb'))
        
    data_lock['db_count'] = len(databases)
    data_locks.append(data_lock)

 def fast_iter(context):
    data_locks = []
    data_procs = []
    for event, elem in context:
        analyze_deadlock(elem, data_locks, data_procs)
        elem.clear()
        while elem.getprevious() is not None:
            del elem.getparent()[0]
    del context
    return (data_locks, data_procs)

 # Parse the XML file
 infile = 'hugetrace.xdl'

 start_time = timeit.default_timer()
 context = etree.iterparse(infile, events=('end',), tag=('deadlock'))
 data_locks, data_procs = fast_iter(context)
 end_time = timeit.default_timer()
 print('Analyzed in {} seconds'.format(end_time - start_time))

 # Convert to DataFrames
 df_procs = DataFrame(data_procs)
 df_locks = DataFrame(data_locks)
 df_procs.fillna('', inplace=True)
	import timeit
	from lxml import etree
	from pandas import DataFrame
	from uuid import uuid4

	# List of process attributes to retrieve
	def analyze_deadlock(elem, data_locks, data_procs):
	# Generate a UUID to identify all processes belonging to the same deadlock event
	dl_uuid = uuid4().hex
	databases = set()
	data_lock = {}

	data_lock['dl_uuid'] = dl_uuid
	data_lock['victim_id'] = elem.get('victim')

	for proc in elem.find('./process-list').iterchildren(tag='process'):
	data_proc = {}
	data_proc['dl_uuid'] = dl_uuid
	# Add all process XML attributes
	data_proc.update(proc.attrib)

	# Look for the SQL statement in the executionStack/frame element
	# Disregard SQL statements with placeholders (e.g. @1)
	# Also, only consider frames with adhoc as procname (not unknown or anything else)
	for frame in proc.find('./executionStack').iterchildren(tag='frame'):
	if (frame.get('procname') == 'adhoc') and ('@1' not in frame.text):
	data_proc['sql'] = frame.text.strip()
	break
	else:
	# If no SQL statement could be found, take the SQL statement from the
	# inputbuf element
	data_proc['sql'] = proc.find('./inputbuf').text.strip()

	data_procs.append(data_proc)
	databases.add(proc.get('currentdb'))

	data_lock['db_count'] = len(databases)
	data_locks.append(data_lock)

	def fast_iter(context):
	data_locks = []
	data_procs = []
	for event, elem in context:
	analyze_deadlock(elem, data_locks, data_procs)
	elem.clear()
	while elem.getprevious() is not None:
	del elem.getparent()[0]
	del context
	return (data_locks, data_procs)

	# Parse the XML file
	infile = 'hugetrace.xdl'

	start_time = timeit.default_timer()
	context = etree.iterparse(infile, events=('end',), tag=('deadlock'))
	data_locks, data_procs = fast_iter(context)
	end_time = timeit.default_timer()
	print('Analyzed in {} seconds'.format(end_time - start_time))

	# Convert to DataFrames
	df_procs = DataFrame(data_procs)
	df_locks = DataFrame(data_locks)
	df_procs.fillna('', inplace=True)