Skip to content

Instantly share code, notes, and snippets.

@djinn
Created March 26, 2025 11:35
Show Gist options
  • Save djinn/d617973c08f743a11eda421d23ae2a1c to your computer and use it in GitHub Desktop.
Save djinn/d617973c08f743a11eda421d23ae2a1c to your computer and use it in GitHub Desktop.
Visualizing the GC logs for G1 activities
import matplotlib.pyplot as plt
import pandas as pd
import re
def parse_gc_log(gc_log):
# Split the input log by lines
lines = gc_log.split("\n")
# Prepare lists for each graph's data
gc_timing = []
heap_regions = {'Eden': [], 'Survivor': [], 'Old': [], 'Humongous': []}
metaspace = {'Metaspace': [], 'NonClass': [], 'Class': []}
pause_details = []
system_details = []
start_time = None # To store the timestamp of the first event
# Parse each line in the log
for line in lines:
if 'GC' in line:
# Extract timestamp if available (example pattern: [2025-03-25 10:15:32])
timestamp_match = re.search(r'\[(.*?)\]', line)
if timestamp_match:
timestamp = timestamp_match.group(1)
# Convert timestamp to seconds since the epoch (or use your own preferred time format)
timestamp = pd.to_datetime(timestamp)
# Set start time if it's the first timestamp
if start_time is None:
start_time = timestamp
# Compute relative time from start time
relative_time = (timestamp - start_time).total_seconds()
# GC Timing (line graph)
if 'Pre Evacuate Collection Set' in line:
gc_timing.append(('Pre Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
elif 'Evacuate Collection Set' in line:
gc_timing.append(('Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
elif 'Post Evacuate Collection Set' in line:
gc_timing.append(('Post Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
elif 'Other' in line:
gc_timing.append(('Other', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
# Heap Regions (scatter plot)
if 'Eden regions' in line:
try:
eden = line.split("regions:")[1].split("->")
heap_regions['Eden'].append((relative_time, int(eden[1].split("(")[0])))
except IndexError:
pass
if 'Survivor regions' in line:
try:
survivor = line.split("regions:")[1].split("->")
heap_regions['Survivor'].append((relative_time, int(survivor[1].split("(")[0])))
except IndexError:
pass
if 'Old regions' in line:
try:
old = line.split("regions:")[1].split("->")
heap_regions['Old'].append((relative_time, int(old[1])))
except IndexError:
pass
if 'Humongous regions' in line:
try:
humongous = line.split("regions:")[1].split("->")
heap_regions['Humongous'].append((relative_time, int(humongous[1])))
except IndexError:
pass
# Metaspace (scatter plot)
if 'Metaspace' in line:
metaspace_vals = line.split("->")[1].split('K')
metaspace['Metaspace'].append((relative_time, int(metaspace_vals[0].strip())))
metaspace['NonClass'].append((relative_time, int(line.split('NonClass:')[1].split('K')[0].strip())))
metaspace['Class'].append((relative_time, int(line.split('Class:')[1].split('K')[0].strip())))
# Pause details (line graph)
if 'Pause Young' in line:
# Extracting memory values and time
parts = line.split()
try:
before_gc = int(parts[3].split('M')[0])
after_gc = int(parts[4].split('M')[0])
time_taken = float(parts[-2].replace('ms', ''))
pause_details.append((relative_time, before_gc, after_gc, time_taken))
except ValueError:
pass
# System details (system usage)
if 'User=' in line:
# Clean up extra spaces and split correctly
sys_data = line.split(')')[1].strip()
user_t, sys_t, real_t = [t.split('=')[1][:-1] for t in sys_data.split(' ')]
system_details.append((relative_time, float(user_t), float(sys_t), float(real_t)))
return gc_timing, heap_regions, metaspace, pause_details, system_details
def plot_gc_logs(gc_log):
# Parse the GC log
gc_timing, heap_regions, metaspace, pause_details, system_details = parse_gc_log(gc_log)
# Normalize the GC timing (subtract median of GC timing)
gc_timing_df = pd.DataFrame(gc_timing, columns=['Event', 'Time(s)', 'Time(ms)'])
median_gc_time = gc_timing_df['Time(ms)'].median() # Calculate the median of GC timings
# Only normalize and plot if the median is not zero
if median_gc_time != 0:
gc_timing_df['Normalized Time(ms)'] = gc_timing_df['Time(ms)'] - median_gc_time # Subtract median
# Create subplots with 2x2 grid for top and system plot taking the entire width at the bottom
fig = plt.figure(figsize=(15, 15))
# Create 2x2 grid for the top 4 plots and one subplot for the system details
grid_spec = fig.add_gridspec(3, 2, height_ratios=[1, 1, 2]) # Define 2x2 grid for the top 4 plots and the bottom for system details
# 1. GC Timing - Line Graph (Normalized)
ax1 = fig.add_subplot(grid_spec[0, 0])
ax1.plot(gc_timing_df['Time(s)'], gc_timing_df['Normalized Time(ms)'], marker='o')
ax1.set_title("Normalized GC Timing")
ax1.set_xlabel('Relative Time (s)')
ax1.set_ylabel('Normalized Time (ms)')
# 2. Heap Regions - Scatter Plot
ax2 = fig.add_subplot(grid_spec[0, 1])
for region in heap_regions:
x, y = zip(*heap_regions[region])
ax2.scatter(x, y, label=region) # Use relative time as x-axis
ax2.set_title("Heap Regions")
ax2.set_xlabel('Relative Time (s)')
ax2.set_ylabel('After GC')
ax2.legend()
# 3. Metaspace - Scatter Plot
ax3 = fig.add_subplot(grid_spec[1, 0])
metaspace_df = pd.DataFrame(metaspace)
ax3.scatter(metaspace_df['Metaspace'].apply(lambda x: x[0]), metaspace_df['Metaspace'].apply(lambda x: x[1]), label="Metaspace")
ax3.scatter(metaspace_df['NonClass'].apply(lambda x: x[0]), metaspace_df['NonClass'].apply(lambda x: x[1]), label="NonClass")
ax3.scatter(metaspace_df['Class'].apply(lambda x: x[0]), metaspace_df['Class'].apply(lambda x: x[1]), label="Class")
ax3.set_title("Metaspace")
ax3.set_xlabel('Relative Time (s)')
ax3.set_ylabel('Memory (K)')
ax3.legend()
# 4. Pause Details - Line Graph
ax4 = fig.add_subplot(grid_spec[1, 1])
pause_details_df = pd.DataFrame(pause_details, columns=['Time(s)', 'Before GC', 'After GC', 'Time(ms)'])
ax4.plot(pause_details_df['Time(s)'], pause_details_df['Before GC'], label="Before GC", linestyle='--', marker='o')
ax4.plot(pause_details_df['Time(s)'], pause_details_df['After GC'], label="After GC", linestyle='-', marker='o')
ax4.set_title("Pause Details")
ax4.set_xlabel('Relative Time (s)')
ax4.set_ylabel('Memory (M) / Time (ms)')
ax4.twinx().plot(pause_details_df['Time(s)'], pause_details_df['Time(ms)'], label="Time Taken", color='r', linestyle=':', marker='x')
ax4.legend(loc="upper left")
# 5. System - Line Graph (Full Width)
ax5 = fig.add_subplot(grid_spec[2, :]) # Span entire width
system_df = pd.DataFrame(system_details, columns=['Time(s)', 'User(s)', 'Sys(s)', 'Real(s)'])
ax5.plot(system_df['Time(s)'], system_df['User(s)'], label="User")
ax5.plot(system_df['Time(s)'], system_df['Sys(s)'], label="Sys")
ax5.plot(system_df['Time(s)'], system_df['Real(s)'], label="Real")
ax5.set_title("System Details")
ax5.set_xlabel('Relative Time (s)')
ax5.set_ylabel('Time (s)')
ax5.legend()
# Adjust layout
plt.tight_layout()
plt.show()
else:
print("The median of GC timing is zero. Skipping the normalized GC timing plot.")
import sys
plot_gc_logs(open(sys.argv[1]).read())
@djinn
Copy link
Author

djinn commented Mar 26, 2025

python gcplot.py gc.log

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment