Skip to content

Instantly share code, notes, and snippets.

@djinn
Created March 26, 2025 11:35
Show Gist options
  • Select an option

  • Save djinn/d617973c08f743a11eda421d23ae2a1c to your computer and use it in GitHub Desktop.

Select an option

Save djinn/d617973c08f743a11eda421d23ae2a1c to your computer and use it in GitHub Desktop.
Visualizing the GC logs for G1 activities
import matplotlib.pyplot as plt
import pandas as pd
import re
def parse_gc_log(gc_log):
# Split the input log by lines
lines = gc_log.split("\n")
# Prepare lists for each graph's data
gc_timing = []
heap_regions = {'Eden': [], 'Survivor': [], 'Old': [], 'Humongous': []}
metaspace = {'Metaspace': [], 'NonClass': [], 'Class': []}
pause_details = []
system_details = []
start_time = None # To store the timestamp of the first event
# Parse each line in the log
for line in lines:
if 'GC' in line:
# Extract timestamp if available (example pattern: [2025-03-25 10:15:32])
timestamp_match = re.search(r'\[(.*?)\]', line)
if timestamp_match:
timestamp = timestamp_match.group(1)
# Convert timestamp to seconds since the epoch (or use your own preferred time format)
timestamp = pd.to_datetime(timestamp)
# Set start time if it's the first timestamp
if start_time is None:
start_time = timestamp
# Compute relative time from start time
relative_time = (timestamp - start_time).total_seconds()
# GC Timing (line graph)
if 'Pre Evacuate Collection Set' in line:
gc_timing.append(('Pre Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
elif 'Evacuate Collection Set' in line:
gc_timing.append(('Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
elif 'Post Evacuate Collection Set' in line:
gc_timing.append(('Post Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
elif 'Other' in line:
gc_timing.append(('Other', relative_time, float(line.split(":")[-1].strip().replace('ms', ''))))
# Heap Regions (scatter plot)
if 'Eden regions' in line:
try:
eden = line.split("regions:")[1].split("->")
heap_regions['Eden'].append((relative_time, int(eden[1].split("(")[0])))
except IndexError:
pass
if 'Survivor regions' in line:
try:
survivor = line.split("regions:")[1].split("->")
heap_regions['Survivor'].append((relative_time, int(survivor[1].split("(")[0])))
except IndexError:
pass
if 'Old regions' in line:
try:
old = line.split("regions:")[1].split("->")
heap_regions['Old'].append((relative_time, int(old[1])))
except IndexError:
pass
if 'Humongous regions' in line:
try:
humongous = line.split("regions:")[1].split("->")
heap_regions['Humongous'].append((relative_time, int(humongous[1])))
except IndexError:
pass
# Metaspace (scatter plot)
if 'Metaspace' in line:
metaspace_vals = line.split("->")[1].split('K')
metaspace['Metaspace'].append((relative_time, int(metaspace_vals[0].strip())))
metaspace['NonClass'].append((relative_time, int(line.split('NonClass:')[1].split('K')[0].strip())))
metaspace['Class'].append((relative_time, int(line.split('Class:')[1].split('K')[0].strip())))
# Pause details (line graph)
if 'Pause Young' in line:
# Extracting memory values and time
parts = line.split()
try:
before_gc = int(parts[3].split('M')[0])
after_gc = int(parts[4].split('M')[0])
time_taken = float(parts[-2].replace('ms', ''))
pause_details.append((relative_time, before_gc, after_gc, time_taken))
except ValueError:
pass
# System details (system usage)
if 'User=' in line:
# Clean up extra spaces and split correctly
sys_data = line.split(')')[1].strip()
user_t, sys_t, real_t = [t.split('=')[1][:-1] for t in sys_data.split(' ')]
system_details.append((relative_time, float(user_t), float(sys_t), float(real_t)))
return gc_timing, heap_regions, metaspace, pause_details, system_details
def plot_gc_logs(gc_log):
# Parse the GC log
gc_timing, heap_regions, metaspace, pause_details, system_details = parse_gc_log(gc_log)
# Normalize the GC timing (subtract median of GC timing)
gc_timing_df = pd.DataFrame(gc_timing, columns=['Event', 'Time(s)', 'Time(ms)'])
median_gc_time = gc_timing_df['Time(ms)'].median() # Calculate the median of GC timings
# Only normalize and plot if the median is not zero
if median_gc_time != 0:
gc_timing_df['Normalized Time(ms)'] = gc_timing_df['Time(ms)'] - median_gc_time # Subtract median
# Create subplots with 2x2 grid for top and system plot taking the entire width at the bottom
fig = plt.figure(figsize=(15, 15))
# Create 2x2 grid for the top 4 plots and one subplot for the system details
grid_spec = fig.add_gridspec(3, 2, height_ratios=[1, 1, 2]) # Define 2x2 grid for the top 4 plots and the bottom for system details
# 1. GC Timing - Line Graph (Normalized)
ax1 = fig.add_subplot(grid_spec[0, 0])
ax1.plot(gc_timing_df['Time(s)'], gc_timing_df['Normalized Time(ms)'], marker='o')
ax1.set_title("Normalized GC Timing")
ax1.set_xlabel('Relative Time (s)')
ax1.set_ylabel('Normalized Time (ms)')
# 2. Heap Regions - Scatter Plot
ax2 = fig.add_subplot(grid_spec[0, 1])
for region in heap_regions:
x, y = zip(*heap_regions[region])
ax2.scatter(x, y, label=region) # Use relative time as x-axis
ax2.set_title("Heap Regions")
ax2.set_xlabel('Relative Time (s)')
ax2.set_ylabel('After GC')
ax2.legend()
# 3. Metaspace - Scatter Plot
ax3 = fig.add_subplot(grid_spec[1, 0])
metaspace_df = pd.DataFrame(metaspace)
ax3.scatter(metaspace_df['Metaspace'].apply(lambda x: x[0]), metaspace_df['Metaspace'].apply(lambda x: x[1]), label="Metaspace")
ax3.scatter(metaspace_df['NonClass'].apply(lambda x: x[0]), metaspace_df['NonClass'].apply(lambda x: x[1]), label="NonClass")
ax3.scatter(metaspace_df['Class'].apply(lambda x: x[0]), metaspace_df['Class'].apply(lambda x: x[1]), label="Class")
ax3.set_title("Metaspace")
ax3.set_xlabel('Relative Time (s)')
ax3.set_ylabel('Memory (K)')
ax3.legend()
# 4. Pause Details - Line Graph
ax4 = fig.add_subplot(grid_spec[1, 1])
pause_details_df = pd.DataFrame(pause_details, columns=['Time(s)', 'Before GC', 'After GC', 'Time(ms)'])
ax4.plot(pause_details_df['Time(s)'], pause_details_df['Before GC'], label="Before GC", linestyle='--', marker='o')
ax4.plot(pause_details_df['Time(s)'], pause_details_df['After GC'], label="After GC", linestyle='-', marker='o')
ax4.set_title("Pause Details")
ax4.set_xlabel('Relative Time (s)')
ax4.set_ylabel('Memory (M) / Time (ms)')
ax4.twinx().plot(pause_details_df['Time(s)'], pause_details_df['Time(ms)'], label="Time Taken", color='r', linestyle=':', marker='x')
ax4.legend(loc="upper left")
# 5. System - Line Graph (Full Width)
ax5 = fig.add_subplot(grid_spec[2, :]) # Span entire width
system_df = pd.DataFrame(system_details, columns=['Time(s)', 'User(s)', 'Sys(s)', 'Real(s)'])
ax5.plot(system_df['Time(s)'], system_df['User(s)'], label="User")
ax5.plot(system_df['Time(s)'], system_df['Sys(s)'], label="Sys")
ax5.plot(system_df['Time(s)'], system_df['Real(s)'], label="Real")
ax5.set_title("System Details")
ax5.set_xlabel('Relative Time (s)')
ax5.set_ylabel('Time (s)')
ax5.legend()
# Adjust layout
plt.tight_layout()
plt.show()
else:
print("The median of GC timing is zero. Skipping the normalized GC timing plot.")
import sys
plot_gc_logs(open(sys.argv[1]).read())
@djinn
Copy link
Copy Markdown
Author

djinn commented Mar 26, 2025

python gcplot.py gc.log

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment