Created
March 26, 2025 11:35
-
-
Save djinn/d617973c08f743a11eda421d23ae2a1c to your computer and use it in GitHub Desktop.
Visualizing the GC logs for G1 activities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import pandas as pd | |
import re | |
def parse_gc_log(gc_log): | |
# Split the input log by lines | |
lines = gc_log.split("\n") | |
# Prepare lists for each graph's data | |
gc_timing = [] | |
heap_regions = {'Eden': [], 'Survivor': [], 'Old': [], 'Humongous': []} | |
metaspace = {'Metaspace': [], 'NonClass': [], 'Class': []} | |
pause_details = [] | |
system_details = [] | |
start_time = None # To store the timestamp of the first event | |
# Parse each line in the log | |
for line in lines: | |
if 'GC' in line: | |
# Extract timestamp if available (example pattern: [2025-03-25 10:15:32]) | |
timestamp_match = re.search(r'\[(.*?)\]', line) | |
if timestamp_match: | |
timestamp = timestamp_match.group(1) | |
# Convert timestamp to seconds since the epoch (or use your own preferred time format) | |
timestamp = pd.to_datetime(timestamp) | |
# Set start time if it's the first timestamp | |
if start_time is None: | |
start_time = timestamp | |
# Compute relative time from start time | |
relative_time = (timestamp - start_time).total_seconds() | |
# GC Timing (line graph) | |
if 'Pre Evacuate Collection Set' in line: | |
gc_timing.append(('Pre Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', '')))) | |
elif 'Evacuate Collection Set' in line: | |
gc_timing.append(('Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', '')))) | |
elif 'Post Evacuate Collection Set' in line: | |
gc_timing.append(('Post Evacuate', relative_time, float(line.split(":")[-1].strip().replace('ms', '')))) | |
elif 'Other' in line: | |
gc_timing.append(('Other', relative_time, float(line.split(":")[-1].strip().replace('ms', '')))) | |
# Heap Regions (scatter plot) | |
if 'Eden regions' in line: | |
try: | |
eden = line.split("regions:")[1].split("->") | |
heap_regions['Eden'].append((relative_time, int(eden[1].split("(")[0]))) | |
except IndexError: | |
pass | |
if 'Survivor regions' in line: | |
try: | |
survivor = line.split("regions:")[1].split("->") | |
heap_regions['Survivor'].append((relative_time, int(survivor[1].split("(")[0]))) | |
except IndexError: | |
pass | |
if 'Old regions' in line: | |
try: | |
old = line.split("regions:")[1].split("->") | |
heap_regions['Old'].append((relative_time, int(old[1]))) | |
except IndexError: | |
pass | |
if 'Humongous regions' in line: | |
try: | |
humongous = line.split("regions:")[1].split("->") | |
heap_regions['Humongous'].append((relative_time, int(humongous[1]))) | |
except IndexError: | |
pass | |
# Metaspace (scatter plot) | |
if 'Metaspace' in line: | |
metaspace_vals = line.split("->")[1].split('K') | |
metaspace['Metaspace'].append((relative_time, int(metaspace_vals[0].strip()))) | |
metaspace['NonClass'].append((relative_time, int(line.split('NonClass:')[1].split('K')[0].strip()))) | |
metaspace['Class'].append((relative_time, int(line.split('Class:')[1].split('K')[0].strip()))) | |
# Pause details (line graph) | |
if 'Pause Young' in line: | |
# Extracting memory values and time | |
parts = line.split() | |
try: | |
before_gc = int(parts[3].split('M')[0]) | |
after_gc = int(parts[4].split('M')[0]) | |
time_taken = float(parts[-2].replace('ms', '')) | |
pause_details.append((relative_time, before_gc, after_gc, time_taken)) | |
except ValueError: | |
pass | |
# System details (system usage) | |
if 'User=' in line: | |
# Clean up extra spaces and split correctly | |
sys_data = line.split(')')[1].strip() | |
user_t, sys_t, real_t = [t.split('=')[1][:-1] for t in sys_data.split(' ')] | |
system_details.append((relative_time, float(user_t), float(sys_t), float(real_t))) | |
return gc_timing, heap_regions, metaspace, pause_details, system_details | |
def plot_gc_logs(gc_log): | |
# Parse the GC log | |
gc_timing, heap_regions, metaspace, pause_details, system_details = parse_gc_log(gc_log) | |
# Normalize the GC timing (subtract median of GC timing) | |
gc_timing_df = pd.DataFrame(gc_timing, columns=['Event', 'Time(s)', 'Time(ms)']) | |
median_gc_time = gc_timing_df['Time(ms)'].median() # Calculate the median of GC timings | |
# Only normalize and plot if the median is not zero | |
if median_gc_time != 0: | |
gc_timing_df['Normalized Time(ms)'] = gc_timing_df['Time(ms)'] - median_gc_time # Subtract median | |
# Create subplots with 2x2 grid for top and system plot taking the entire width at the bottom | |
fig = plt.figure(figsize=(15, 15)) | |
# Create 2x2 grid for the top 4 plots and one subplot for the system details | |
grid_spec = fig.add_gridspec(3, 2, height_ratios=[1, 1, 2]) # Define 2x2 grid for the top 4 plots and the bottom for system details | |
# 1. GC Timing - Line Graph (Normalized) | |
ax1 = fig.add_subplot(grid_spec[0, 0]) | |
ax1.plot(gc_timing_df['Time(s)'], gc_timing_df['Normalized Time(ms)'], marker='o') | |
ax1.set_title("Normalized GC Timing") | |
ax1.set_xlabel('Relative Time (s)') | |
ax1.set_ylabel('Normalized Time (ms)') | |
# 2. Heap Regions - Scatter Plot | |
ax2 = fig.add_subplot(grid_spec[0, 1]) | |
for region in heap_regions: | |
x, y = zip(*heap_regions[region]) | |
ax2.scatter(x, y, label=region) # Use relative time as x-axis | |
ax2.set_title("Heap Regions") | |
ax2.set_xlabel('Relative Time (s)') | |
ax2.set_ylabel('After GC') | |
ax2.legend() | |
# 3. Metaspace - Scatter Plot | |
ax3 = fig.add_subplot(grid_spec[1, 0]) | |
metaspace_df = pd.DataFrame(metaspace) | |
ax3.scatter(metaspace_df['Metaspace'].apply(lambda x: x[0]), metaspace_df['Metaspace'].apply(lambda x: x[1]), label="Metaspace") | |
ax3.scatter(metaspace_df['NonClass'].apply(lambda x: x[0]), metaspace_df['NonClass'].apply(lambda x: x[1]), label="NonClass") | |
ax3.scatter(metaspace_df['Class'].apply(lambda x: x[0]), metaspace_df['Class'].apply(lambda x: x[1]), label="Class") | |
ax3.set_title("Metaspace") | |
ax3.set_xlabel('Relative Time (s)') | |
ax3.set_ylabel('Memory (K)') | |
ax3.legend() | |
# 4. Pause Details - Line Graph | |
ax4 = fig.add_subplot(grid_spec[1, 1]) | |
pause_details_df = pd.DataFrame(pause_details, columns=['Time(s)', 'Before GC', 'After GC', 'Time(ms)']) | |
ax4.plot(pause_details_df['Time(s)'], pause_details_df['Before GC'], label="Before GC", linestyle='--', marker='o') | |
ax4.plot(pause_details_df['Time(s)'], pause_details_df['After GC'], label="After GC", linestyle='-', marker='o') | |
ax4.set_title("Pause Details") | |
ax4.set_xlabel('Relative Time (s)') | |
ax4.set_ylabel('Memory (M) / Time (ms)') | |
ax4.twinx().plot(pause_details_df['Time(s)'], pause_details_df['Time(ms)'], label="Time Taken", color='r', linestyle=':', marker='x') | |
ax4.legend(loc="upper left") | |
# 5. System - Line Graph (Full Width) | |
ax5 = fig.add_subplot(grid_spec[2, :]) # Span entire width | |
system_df = pd.DataFrame(system_details, columns=['Time(s)', 'User(s)', 'Sys(s)', 'Real(s)']) | |
ax5.plot(system_df['Time(s)'], system_df['User(s)'], label="User") | |
ax5.plot(system_df['Time(s)'], system_df['Sys(s)'], label="Sys") | |
ax5.plot(system_df['Time(s)'], system_df['Real(s)'], label="Real") | |
ax5.set_title("System Details") | |
ax5.set_xlabel('Relative Time (s)') | |
ax5.set_ylabel('Time (s)') | |
ax5.legend() | |
# Adjust layout | |
plt.tight_layout() | |
plt.show() | |
else: | |
print("The median of GC timing is zero. Skipping the normalized GC timing plot.") | |
import sys | |
plot_gc_logs(open(sys.argv[1]).read()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python gcplot.py gc.log