Created
May 7, 2025 20:21
-
-
Save greyhoundforty/0cead3155cc441fb726f83355528a656 to your computer and use it in GitHub Desktop.
RVtools Python Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import glob | |
from typing import Dict, List, Optional, Union, Tuple | |
import statistics | |
import click | |
class RVToolsAnalyzer: | |
def __init__(self, directory_path: str = None): | |
""" | |
Initialize the RVTools analyzer with a directory containing RVTools CSV exports. | |
Args: | |
directory_path: Path to directory containing RVTools CSV files | |
""" | |
self.directory_path = directory_path | |
self.dataframes = {} | |
self.file_mapping = { | |
'vinfo': None, | |
'vhost': None, | |
'vcluster': None, | |
'vcpu': None, | |
'vmemory': None, | |
'vnetwork': None, | |
'vdisk': None | |
} | |
if directory_path: | |
self.load_csv_files() | |
def load_csv_files(self) -> None: | |
"""Load all RVTools CSV files from the specified directory.""" | |
if not self.directory_path or not os.path.exists(self.directory_path): | |
raise ValueError(f"Directory path {self.directory_path} does not exist") | |
# Map with exact RVTools filenames to look for | |
exact_file_mapping = { | |
'vinfo': 'RVTools_tabvInfo.csv', | |
'vhost': 'RVTools_tabvHost.csv', | |
'vcluster': 'RVTools_tabvCluster.csv', | |
'vcpu': 'RVTools_tabvCPU.csv', | |
'vmemory': 'RVTools_tabvMemory.csv', | |
'vnetwork': 'RVTools_tabvNetwork.csv', | |
'vdisk': 'RVTools_tabvDisk.csv' | |
} | |
# Find the exact filenames in the directory | |
for key, filename in exact_file_mapping.items(): | |
filepath = os.path.join(self.directory_path, filename) | |
if os.path.exists(filepath): | |
self.file_mapping[key] = filepath | |
print(f"Found {key} file: {filename}") | |
else: | |
print(f"Warning: {filename} not found in {self.directory_path}") | |
# Load each file into a pandas DataFrame | |
for key, file_path in self.file_mapping.items(): | |
if file_path and os.path.exists(file_path): | |
try: | |
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False) | |
df.columns = [c.strip().replace(' ', '_').replace('(', '').replace(')', '').replace('%', 'pct').replace('#', 'num') for c in df.columns] | |
self.dataframes[key] = df | |
print(f"Loaded {key} data from {os.path.basename(file_path)} with {len(df)} rows and {len(df.columns)} columns") | |
print(f"Sample columns: {', '.join(df.columns[:5])}") | |
except Exception as e: | |
print(f"Error loading {file_path}: {e}") | |
else: | |
print(f"File for {key} not found or not valid") | |
def get_cluster_utilization(self) -> pd.DataFrame: | |
""" | |
Calculate and return cluster utilization metrics. | |
Returns: | |
DataFrame with cluster utilization statistics | |
""" | |
if 'vcluster' not in self.dataframes: | |
raise ValueError("vCluster data not found in loaded files") | |
df = self.dataframes['vcluster'] | |
# Select relevant columns - adjust these based on actual column names in your CSVs | |
# Common columns might include: Cluster, NumCPUs, CPUMhz, MemoryGB, etc. | |
cluster_cols = [col for col in df.columns if 'cluster' in col.lower() or 'name' in col.lower()] | |
cpu_cols = [col for col in df.columns if 'cpu' in col.lower()] | |
mem_cols = [col for col in df.columns if 'mem' in col.lower() or 'ram' in col.lower()] | |
# Create a summary DataFrame | |
if cluster_cols: | |
cluster_name_col = cluster_cols[0] | |
result = df[[cluster_name_col]].copy() | |
# Add CPU metrics | |
for col in cpu_cols: | |
if 'pct' in col.lower(): # Utilization percentage columns | |
result[col] = df[col] | |
# Add Memory metrics | |
for col in mem_cols: | |
if 'pct' in col.lower(): # Utilization percentage columns | |
result[col] = df[col] | |
return result | |
return pd.DataFrame() # Return empty DataFrame if no suitable columns found | |
def get_host_utilization(self) -> pd.DataFrame: | |
""" | |
Calculate and return host utilization metrics. | |
Returns: | |
DataFrame with host-level utilization statistics | |
""" | |
if 'vhost' not in self.dataframes: | |
raise ValueError("vHost data not found in loaded files") | |
df = self.dataframes['vhost'] | |
# Select relevant columns | |
host_cols = [col for col in df.columns if 'host' in col.lower() or 'name' in col.lower() or 'esx' in col.lower()] | |
cluster_cols = [col for col in df.columns if 'cluster' in col.lower()] | |
cpu_cols = [col for col in df.columns if 'cpu' in col.lower() and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
mem_cols = [col for col in df.columns if ('mem' in col.lower() or 'ram' in col.lower()) and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
if host_cols: | |
host_name_col = host_cols[0] | |
result_cols = [host_name_col] | |
if cluster_cols: | |
result_cols.append(cluster_cols[0]) | |
result_cols.extend(cpu_cols) | |
result_cols.extend(mem_cols) | |
# Filter to only include columns that exist in the dataframe | |
result_cols = [col for col in result_cols if col in df.columns] | |
if result_cols: | |
return df[result_cols].copy() | |
return pd.DataFrame() | |
def get_vm_cpu_utilization(self) -> pd.DataFrame: | |
""" | |
Calculate VM CPU utilization metrics based on the column structure in RVTools_tabvInfo.csv. | |
Returns: | |
DataFrame with VM CPU utilization statistics | |
""" | |
if 'vinfo' not in self.dataframes: | |
print("vInfo data not found in loaded files") | |
return pd.DataFrame() | |
df = self.dataframes['vinfo'] | |
# Based on the provided column headers from RVTools_tabvInfo.csv | |
# VM name should be in column 0 called 'VM' | |
vm_cols = ['VM'] if 'VM' in df.columns else [col for col in df.columns if 'vm' in col.lower() and 'name' not in col.lower()] | |
if not vm_cols: | |
vm_cols = [col for col in df.columns if 'name' in col.lower() and 'dns' not in col.lower()] | |
# CPU information in column 16 (CPUs) and possibly Overall_Cpu_Readiness (column 17) | |
cpu_cols = ['CPUs', 'Overall_Cpu_Readiness'] if 'CPUs' in df.columns else [ | |
col for col in df.columns if 'cpu' in col.lower() and 'num' not in col.lower() | |
] | |
# Cluster and host information (columns 76-78) | |
cluster_cols = ['Cluster'] if 'Cluster' in df.columns else [col for col in df.columns if 'cluster' in col.lower()] | |
host_cols = ['Host'] if 'Host' in df.columns else [col for col in df.columns if 'host' in col.lower()] | |
if vm_cols: | |
vm_col = vm_cols[0] | |
result_cols = [vm_col] | |
# Add cluster and host columns if available | |
if cluster_cols: | |
result_cols.extend(cluster_cols) | |
if host_cols: | |
result_cols.extend(host_cols) | |
# Add CPU columns if available | |
result_cols.extend([col for col in cpu_cols if col in df.columns]) | |
# Filter to only include columns that exist in the dataframe | |
result_cols = [col for col in result_cols if col in df.columns] | |
if result_cols: | |
print(f"Using columns for VM CPU utilization: {', '.join(result_cols)}") | |
return df[result_cols].copy() | |
return pd.DataFrame() | |
def get_vm_memory_utilization(self) -> pd.DataFrame: | |
""" | |
Calculate VM memory utilization metrics based on the column structure in RVTools_tabvInfo.csv. | |
Returns: | |
DataFrame with VM memory utilization statistics | |
""" | |
if 'vinfo' not in self.dataframes: | |
print("vInfo data not found in loaded files") | |
return pd.DataFrame() | |
df = self.dataframes['vinfo'] | |
# Based on the provided column headers from RVTools_tabvInfo.csv | |
# VM name should be in column 0 called 'VM' | |
vm_cols = ['VM'] if 'VM' in df.columns else [col for col in df.columns if 'vm' in col.lower() and 'name' not in col.lower()] | |
if not vm_cols: | |
vm_cols = [col for col in df.columns if 'name' in col.lower() and 'dns' not in col.lower()] | |
# Memory information in column 18 (Memory) and possibly Active_Memory (column 19) | |
mem_cols = ['Memory', 'Active_Memory'] if 'Memory' in df.columns else [ | |
col for col in df.columns if ('mem' in col.lower() or 'ram' in col.lower()) | |
] | |
# Cluster and host information (columns 76-78) | |
cluster_cols = ['Cluster'] if 'Cluster' in df.columns else [col for col in df.columns if 'cluster' in col.lower()] | |
host_cols = ['Host'] if 'Host' in df.columns else [col for col in df.columns if 'host' in col.lower()] | |
if vm_cols: | |
vm_col = vm_cols[0] | |
result_cols = [vm_col] | |
# Add cluster and host columns if available | |
if cluster_cols: | |
result_cols.extend(cluster_cols) | |
if host_cols: | |
result_cols.extend(host_cols) | |
# Add memory columns if available | |
result_cols.extend([col for col in mem_cols if col in df.columns]) | |
# Filter to only include columns that exist in the dataframe | |
result_cols = [col for col in result_cols if col in df.columns] | |
if result_cols: | |
print(f"Using columns for VM memory utilization: {', '.join(result_cols)}") | |
return df[result_cols].copy() | |
return pd.DataFrame() | |
def print_aggregated_vm_report(self, top_n: int = 10, by: str = 'cpu') -> None: | |
""" | |
Print a formatted VM utilization report, showing the top N VMs by utilization. | |
Args: | |
top_n: Number of top VMs to show | |
by: Metric to sort by ('cpu' or 'memory') | |
""" | |
# If we have vinfo but not vcpu or vmemory, use vinfo for everything | |
if 'vinfo' in self.dataframes and ('vcpu' not in self.dataframes or 'vmemory' not in self.dataframes): | |
print("Using vInfo data for VM utilization metrics") | |
vm_cpu_df = self.get_vm_cpu_utilization() | |
vm_memory_df = self.get_vm_memory_utilization() | |
else: | |
# Otherwise use the standard methods | |
vm_cpu_df = self.get_vm_cpu_utilization() if 'vcpu' in self.dataframes else None | |
vm_memory_df = self.get_vm_memory_utilization() if 'vmemory' in self.dataframes else None | |
# Rest of the method remains the same... | |
# (The existing implementation to display top VMs) | |
report = {'vms_by_cluster': {}, 'vms_by_host': {}} | |
# Process VM CPU data | |
if vm_cpu_df is not None and not vm_cpu_df.empty: | |
# Try to find cluster and host columns | |
cluster_cols = [col for col in vm_cpu_df.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in vm_cpu_df.columns if 'host' in col.lower()] | |
vm_col = vm_cpu_df.columns[0] # Assume first column is VM name | |
# Identify CPU and memory utilization metrics | |
cpu_cols = [col for col in vm_cpu_df.columns if 'cpu' in col.lower() and col != vm_col] | |
# Group by cluster if available | |
if cluster_cols and vm_col and cpu_cols: | |
cluster_col = cluster_cols[0] | |
print(f"Grouping VMs by cluster using column: {cluster_col}") | |
for cluster in vm_cpu_df[cluster_col].dropna().unique(): | |
cluster_vms = vm_cpu_df[vm_cpu_df[cluster_col] == cluster] | |
# Create a list of (vm_name, cpu_value) tuples for sorting | |
vm_cpu_values = [] | |
for _, row in cluster_vms.iterrows(): | |
vm_name = row[vm_col] | |
# Use first CPU column as the metric for sorting | |
cpu_value = row[cpu_cols[0]] if pd.notna(row[cpu_cols[0]]) else 0 | |
vm_cpu_values.append((vm_name, cpu_value)) | |
# Sort by CPU value (descending) and take top N | |
vm_cpu_values.sort(key=lambda x: x[1], reverse=True) | |
top_vms = vm_cpu_values[:top_n] | |
if by.lower() == 'cpu' and top_vms: | |
print(f"\nCluster: {cluster}") | |
print(f" Top {len(top_vms)} VMs by CPU:") | |
for vm_name, cpu_value in top_vms: | |
print(f" {vm_name}: {cpu_value} CPUs") | |
# Group by host if available | |
if host_cols and vm_col and cpu_cols: | |
host_col = host_cols[0] | |
print(f"Grouping VMs by host using column: {host_col}") | |
for host in vm_cpu_df[host_col].dropna().unique(): | |
host_vms = vm_cpu_df[vm_cpu_df[host_col] == host] | |
# Create a list of (vm_name, cpu_value) tuples for sorting | |
vm_cpu_values = [] | |
for _, row in host_vms.iterrows(): | |
vm_name = row[vm_col] | |
# Use first CPU column as the metric for sorting | |
cpu_value = row[cpu_cols[0]] if pd.notna(row[cpu_cols[0]]) else 0 | |
vm_cpu_values.append((vm_name, cpu_value)) | |
# Sort by CPU value (descending) and take top N | |
vm_cpu_values.sort(key=lambda x: x[1], reverse=True) | |
top_vms = vm_cpu_values[:top_n] | |
if by.lower() == 'cpu' and top_vms: | |
print(f"\nHost: {host}") | |
print(f" Top {len(top_vms)} VMs by CPU:") | |
for vm_name, cpu_value in top_vms: | |
print(f" {vm_name}: {cpu_value} CPUs") | |
# Process VM Memory data | |
if vm_memory_df is not None and not vm_memory_df.empty and by.lower() == 'memory': | |
# Try to find cluster and host columns | |
cluster_cols = [col for col in vm_memory_df.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in vm_memory_df.columns if 'host' in col.lower()] | |
vm_col = vm_memory_df.columns[0] # Assume first column is VM name | |
# Identify memory utilization metrics | |
mem_cols = [col for col in vm_memory_df.columns if ('mem' in col.lower() or 'ram' in col.lower()) and col != vm_col] | |
# Group by cluster if available | |
if cluster_cols and vm_col and mem_cols: | |
cluster_col = cluster_cols[0] | |
print(f"Grouping VMs by cluster using column: {cluster_col}") | |
for cluster in vm_memory_df[cluster_col].dropna().unique(): | |
cluster_vms = vm_memory_df[vm_memory_df[cluster_col] == cluster] | |
# Create a list of (vm_name, mem_value) tuples for sorting | |
vm_mem_values = [] | |
for _, row in cluster_vms.iterrows(): | |
vm_name = row[vm_col] | |
# Use first memory column as the metric for sorting | |
mem_value = row[mem_cols[0]] if pd.notna(row[mem_cols[0]]) else 0 | |
vm_mem_values.append((vm_name, mem_value)) | |
# Sort by memory value (descending) and take top N | |
vm_mem_values.sort(key=lambda x: x[1], reverse=True) | |
top_vms = vm_mem_values[:top_n] | |
if top_vms: | |
print(f"\nCluster: {cluster}") | |
print(f" Top {len(top_vms)} VMs by Memory:") | |
for vm_name, mem_value in top_vms: | |
print(f" {vm_name}: {mem_value} MB") | |
# Group by host if available | |
if host_cols and vm_col and mem_cols: | |
host_col = host_cols[0] | |
print(f"Grouping VMs by host using column: {host_col}") | |
for host in vm_memory_df[host_col].dropna().unique(): | |
host_vms = vm_memory_df[vm_memory_df[host_col] == host] | |
# Create a list of (vm_name, mem_value) tuples for sorting | |
vm_mem_values = [] | |
for _, row in host_vms.iterrows(): | |
vm_name = row[vm_col] | |
# Use first memory column as the metric for sorting | |
mem_value = row[mem_cols[0]] if pd.notna(row[mem_cols[0]]) else 0 | |
vm_mem_values.append((vm_name, mem_value)) | |
# Sort by memory value (descending) and take top N | |
vm_mem_values.sort(key=lambda x: x[1], reverse=True) | |
top_vms = vm_mem_values[:top_n] | |
if top_vms: | |
print(f"\nHost: {host}") | |
print(f" Top {len(top_vms)} VMs by Memory:") | |
for vm_name, mem_value in top_vms: | |
print(f" {vm_name}: {mem_value} MB") | |
if (vm_cpu_df is None or vm_cpu_df.empty) and (vm_memory_df is None or vm_memory_df.empty): | |
print("No VM data available.") | |
def get_vm_memory_utilization(self) -> pd.DataFrame: | |
""" | |
Calculate VM memory utilization metrics. | |
Returns: | |
DataFrame with VM memory utilization statistics | |
""" | |
if 'vmemory' not in self.dataframes: | |
raise ValueError("vMemory data not found in loaded files") | |
df = self.dataframes['vmemory'] | |
# Identify key columns | |
vm_name_cols = [col for col in df.columns if 'vm' in col.lower() and 'name' in col.lower()] | |
if not vm_name_cols: | |
vm_name_cols = [col for col in df.columns if 'name' in col.lower()] | |
mem_cols = [col for col in df.columns if ('mem' in col.lower() or 'ram' in col.lower())] | |
mem_util_cols = [col for col in mem_cols if 'usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower()] | |
mem_size_cols = [col for col in mem_cols if 'size' in col.lower() or 'total' in col.lower() or 'capacity' in col.lower()] | |
if vm_name_cols: | |
vm_col = vm_name_cols[0] | |
result = df[[vm_col]].copy() | |
# Add memory metrics | |
for col in mem_util_cols + mem_size_cols: | |
if col in df.columns: | |
result[col] = df[col] | |
# Add host/cluster info if available | |
cluster_cols = [col for col in df.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in df.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
for col in cluster_cols + host_cols: | |
if col in df.columns: | |
result[col] = df[col] | |
return result | |
return pd.DataFrame() | |
def get_network_utilization(self) -> pd.DataFrame: | |
""" | |
Calculate network utilization metrics. | |
Returns: | |
DataFrame with network utilization statistics | |
""" | |
if 'vnetwork' not in self.dataframes: | |
raise ValueError("vNetwork data not found in loaded files") | |
df = self.dataframes['vnetwork'] | |
# Identify key columns | |
vm_name_cols = [col for col in df.columns if 'vm' in col.lower() and 'name' in col.lower()] | |
if not vm_name_cols: | |
vm_name_cols = [col for col in df.columns if 'name' in col.lower()] | |
network_cols = [col for col in df.columns if 'network' in col.lower() or 'nic' in col.lower() or 'net' in col.lower() | |
or 'adapter' in col.lower() or 'connected' in col.lower() or 'mac' in col.lower()] | |
if vm_name_cols: | |
vm_col = vm_name_cols[0] | |
result = df[[vm_col]].copy() | |
# Add network metrics | |
for col in network_cols: | |
if col in df.columns: | |
result[col] = df[col] | |
# Add host/cluster info if available | |
cluster_cols = [col for col in df.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in df.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
for col in cluster_cols + host_cols: | |
if col in df.columns: | |
result[col] = df[col] | |
return result | |
return pd.DataFrame() | |
def get_aggregated_cluster_report(self) -> Dict: | |
""" | |
Generate an aggregated report of cluster-level utilization. | |
Returns: | |
Dictionary with cluster utilization statistics | |
""" | |
report = {} | |
# Get relevant dataframes | |
cluster_df = self.get_cluster_utilization() if 'vcluster' in self.dataframes else None | |
host_df = self.get_host_utilization() if 'vhost' in self.dataframes else None | |
if cluster_df is not None and not cluster_df.empty: | |
# Get cluster names | |
cluster_col = cluster_df.columns[0] # Assuming first column is cluster name | |
clusters = cluster_df[cluster_col].unique() | |
report['clusters'] = {} | |
# Process each cluster | |
for cluster in clusters: | |
cluster_data = cluster_df[cluster_df[cluster_col] == cluster] | |
# CPU metrics | |
cpu_cols = [col for col in cluster_data.columns if 'cpu' in col.lower() and 'pct' in col.lower()] | |
cpu_metrics = {} | |
for col in cpu_cols: | |
if not cluster_data[col].empty: | |
cpu_metrics[col] = cluster_data[col].iloc[0] | |
# Memory metrics | |
mem_cols = [col for col in cluster_data.columns if ('mem' in col.lower() or 'ram' in col.lower()) and 'pct' in col.lower()] | |
mem_metrics = {} | |
for col in mem_cols: | |
if not cluster_data[col].empty: | |
mem_metrics[col] = cluster_data[col].iloc[0] | |
report['clusters'][cluster] = { | |
'cpu': cpu_metrics, | |
'memory': mem_metrics, | |
'hosts': {} # Initialize the hosts dictionary for each cluster | |
} | |
# Add host-level aggregated data if available | |
if host_df is not None and not host_df.empty: | |
# Identify cluster column if it exists | |
cluster_cols = [col for col in host_df.columns if 'cluster' in col.lower()] | |
if cluster_cols: | |
cluster_col = cluster_cols[0] | |
# Group hosts by cluster | |
clusters = host_df[cluster_col].unique() | |
if 'clusters' not in report: | |
report['clusters'] = {} | |
for cluster in clusters: | |
if pd.notna(cluster): # Skip NaN values | |
cluster_hosts = host_df[host_df[cluster_col] == cluster] | |
# CPU metrics | |
cpu_cols = [col for col in cluster_hosts.columns if 'cpu' in col.lower() and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
# Memory metrics | |
mem_cols = [col for col in cluster_hosts.columns if ('mem' in col.lower() or 'ram' in col.lower()) and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
if cluster not in report['clusters']: | |
report['clusters'][cluster] = { | |
'cpu': {}, | |
'memory': {}, | |
'hosts': {} # Initialize the hosts dictionary if this is a new cluster | |
} | |
elif 'hosts' not in report['clusters'][cluster]: | |
# Add the hosts key if it doesn't exist | |
report['clusters'][cluster]['hosts'] = {} | |
# Add host-level details | |
host_col = [col for col in cluster_hosts.columns if 'host' in col.lower() or 'name' in col.lower() or 'esx' in col.lower()][0] | |
for _, host_row in cluster_hosts.iterrows(): | |
host_name = host_row[host_col] | |
host_cpu = {} | |
for col in cpu_cols: | |
if pd.notna(host_row[col]): | |
host_cpu[col] = host_row[col] | |
host_mem = {} | |
for col in mem_cols: | |
if pd.notna(host_row[col]): | |
host_mem[col] = host_row[col] | |
report['clusters'][cluster]['hosts'][host_name] = { | |
'cpu': host_cpu, | |
'memory': host_mem | |
} | |
return report | |
def get_aggregated_vm_report(self) -> Dict: | |
""" | |
Generate an aggregated report of VM-level utilization. | |
Returns: | |
Dictionary with VM utilization statistics grouped by clusters and hosts | |
""" | |
report = {'vms_by_cluster': {}, 'vms_by_host': {}} | |
# Get VM data | |
vm_cpu = self.get_vm_cpu_utilization() if 'vcpu' in self.dataframes else None | |
vm_memory = self.get_vm_memory_utilization() if 'vmemory' in self.dataframes else None | |
vm_network = self.get_network_utilization() if 'vnetwork' in self.dataframes else None | |
# Process VM CPU data | |
if vm_cpu is not None and not vm_cpu.empty: | |
# Try to find cluster and host columns | |
cluster_cols = [col for col in vm_cpu.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in vm_cpu.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
vm_col = [col for col in vm_cpu.columns if 'vm' in col.lower() and 'name' in col.lower()] | |
if not vm_col: | |
vm_col = [col for col in vm_cpu.columns if 'name' in col.lower()] | |
vm_col = vm_col[0] if vm_col else None | |
if vm_col: | |
# CPU metrics columns | |
cpu_cols = [col for col in vm_cpu.columns if 'cpu' in col.lower() and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
# Group by cluster if available | |
if cluster_cols and cluster_cols[0] in vm_cpu.columns: | |
cluster_col = cluster_cols[0] | |
clusters = vm_cpu[cluster_col].dropna().unique() | |
for cluster in clusters: | |
if pd.notna(cluster): | |
cluster_vms = vm_cpu[vm_cpu[cluster_col] == cluster] | |
if cluster not in report['vms_by_cluster']: | |
report['vms_by_cluster'][cluster] = {} | |
for _, vm_row in cluster_vms.iterrows(): | |
vm_name = vm_row[vm_col] | |
if vm_name not in report['vms_by_cluster'][cluster]: | |
report['vms_by_cluster'][cluster][vm_name] = { | |
'cpu': {}, | |
'memory': {}, | |
'network': {} | |
} | |
# Add CPU metrics | |
for col in cpu_cols: | |
if col in vm_row and pd.notna(vm_row[col]): | |
report['vms_by_cluster'][cluster][vm_name]['cpu'][col] = vm_row[col] | |
# Group by host if available | |
if host_cols and host_cols[0] in vm_cpu.columns: | |
host_col = host_cols[0] | |
hosts = vm_cpu[host_col].dropna().unique() | |
for host in hosts: | |
if pd.notna(host): | |
host_vms = vm_cpu[vm_cpu[host_col] == host] | |
if host not in report['vms_by_host']: | |
report['vms_by_host'][host] = {} | |
for _, vm_row in host_vms.iterrows(): | |
vm_name = vm_row[vm_col] | |
if vm_name not in report['vms_by_host'][host]: | |
report['vms_by_host'][host][vm_name] = { | |
'cpu': {}, | |
'memory': {}, | |
'network': {} | |
} | |
# Add CPU metrics | |
for col in cpu_cols: | |
if col in vm_row and pd.notna(vm_row[col]): | |
report['vms_by_host'][host][vm_name]['cpu'][col] = vm_row[col] | |
# Process VM memory data | |
if vm_memory is not None and not vm_memory.empty: | |
# Try to find cluster and host columns | |
cluster_cols = [col for col in vm_memory.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in vm_memory.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
vm_col = [col for col in vm_memory.columns if 'vm' in col.lower() and 'name' in col.lower()] | |
if not vm_col: | |
vm_col = [col for col in vm_memory.columns if 'name' in col.lower()] | |
vm_col = vm_col[0] if vm_col else None | |
if vm_col: | |
# Memory metrics columns | |
mem_cols = [col for col in vm_memory.columns if ('mem' in col.lower() or 'ram' in col.lower()) and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower() or 'size' in col.lower())] | |
# Update existing report structure with memory data | |
# By cluster | |
if cluster_cols and cluster_cols[0] in vm_memory.columns: | |
cluster_col = cluster_cols[0] | |
for _, vm_row in vm_memory.iterrows(): | |
cluster = vm_row[cluster_col] if pd.notna(vm_row[cluster_col]) else None | |
vm_name = vm_row[vm_col] | |
if cluster and vm_name: | |
if cluster not in report['vms_by_cluster']: | |
report['vms_by_cluster'][cluster] = {} | |
if vm_name not in report['vms_by_cluster'][cluster]: | |
report['vms_by_cluster'][cluster][vm_name] = { | |
'cpu': {}, | |
'memory': {}, | |
'network': {} | |
} | |
# Add memory metrics | |
for col in mem_cols: | |
if col in vm_row and pd.notna(vm_row[col]): | |
report['vms_by_cluster'][cluster][vm_name]['memory'][col] = vm_row[col] | |
# By host | |
if host_cols and host_cols[0] in vm_memory.columns: | |
host_col = host_cols[0] | |
for _, vm_row in vm_memory.iterrows(): | |
host = vm_row[host_col] if pd.notna(vm_row[host_col]) else None | |
vm_name = vm_row[vm_col] | |
if host and vm_name: | |
if host not in report['vms_by_host']: | |
report['vms_by_host'][host] = {} | |
if vm_name not in report['vms_by_host'][host]: | |
report['vms_by_host'][host][vm_name] = { | |
'cpu': {}, | |
'memory': {}, | |
'network': {} | |
} | |
# Add memory metrics | |
for col in mem_cols: | |
if col in vm_row and pd.notna(vm_row[col]): | |
report['vms_by_host'][host][vm_name]['memory'][col] = vm_row[col] | |
# Process VM network data | |
if vm_network is not None and not vm_network.empty: | |
# Try to find cluster and host columns | |
cluster_cols = [col for col in vm_network.columns if 'cluster' in col.lower()] | |
host_cols = [col for col in vm_network.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
vm_col = [col for col in vm_network.columns if 'vm' in col.lower() and 'name' in col.lower()] | |
if not vm_col: | |
vm_col = [col for col in vm_network.columns if 'name' in col.lower()] | |
vm_col = vm_col[0] if vm_col else None | |
if vm_col: | |
# Network metrics columns | |
net_cols = [col for col in vm_network.columns if 'network' in col.lower() or 'nic' in col.lower() or 'adapters' in col.lower()] | |
# Update existing report structure with network data | |
# By cluster | |
if cluster_cols and cluster_cols[0] in vm_network.columns: | |
cluster_col = cluster_cols[0] | |
for _, vm_row in vm_network.iterrows(): | |
cluster = vm_row[cluster_col] if pd.notna(vm_row[cluster_col]) else None | |
vm_name = vm_row[vm_col] | |
if cluster and vm_name: | |
if cluster not in report['vms_by_cluster']: | |
report['vms_by_cluster'][cluster] = {} | |
if vm_name not in report['vms_by_cluster'][cluster]: | |
report['vms_by_cluster'][cluster][vm_name] = { | |
'cpu': {}, | |
'memory': {}, | |
'network': {} | |
} | |
# Add network metrics | |
for col in net_cols: | |
if col in vm_row and pd.notna(vm_row[col]): | |
report['vms_by_cluster'][cluster][vm_name]['network'][col] = vm_row[col] | |
# By host | |
if host_cols and host_cols[0] in vm_network.columns: | |
host_col = host_cols[0] | |
for _, vm_row in vm_network.iterrows(): | |
host = vm_row[host_col] if pd.notna(vm_row[host_col]) else None | |
vm_name = vm_row[vm_col] | |
if host and vm_name: | |
if host not in report['vms_by_host']: | |
report['vms_by_host'][host] = {} | |
if vm_name not in report['vms_by_host'][host]: | |
report['vms_by_host'][host][vm_name] = { | |
'cpu': {}, | |
'memory': {}, | |
'network': {} | |
} | |
# Add network metrics | |
for col in net_cols: | |
if col in vm_row and pd.notna(vm_row[col]): | |
report['vms_by_host'][host][vm_name]['network'][col] = vm_row[col] | |
return report | |
def print_aggregated_cluster_report(self) -> None: | |
"""Print a formatted cluster utilization report to the console.""" | |
report = self.get_aggregated_cluster_report() | |
if not report or not report.get('clusters'): | |
print("No cluster data available.") | |
return | |
print("\n===== CLUSTER UTILIZATION REPORT =====\n") | |
for cluster_name, cluster_data in report['clusters'].items(): | |
print(f"Cluster: {cluster_name}") | |
# Print CPU metrics | |
if cluster_data.get('cpu'): | |
print(" CPU Utilization:") | |
for metric, value in cluster_data['cpu'].items(): | |
print(f" {metric}: {value}") | |
# Print Memory metrics | |
if cluster_data.get('memory'): | |
print(" Memory Utilization:") | |
for metric, value in cluster_data['memory'].items(): | |
print(f" {metric}: {value}") | |
# Print Host metrics if available | |
if cluster_data.get('hosts'): | |
print(" Hosts:") | |
for host_name, host_data in cluster_data['hosts'].items(): | |
print(f" Host: {host_name}") | |
if host_data.get('cpu'): | |
print(" CPU Utilization:") | |
for metric, value in host_data['cpu'].items(): | |
print(f" {metric}: {value}") | |
if host_data.get('memory'): | |
print(" Memory Utilization:") | |
for metric, value in host_data['memory'].items(): | |
print(f" {metric}: {value}") | |
print() # Add a blank line between clusters | |
def print_aggregated_vm_report(self, top_n: int = 10, by: str = 'cpu') -> None: | |
""" | |
Print a formatted VM utilization report, showing the top N VMs by utilization directly from vInfo. | |
Args: | |
top_n: Number of top VMs to show | |
by: Metric to sort by ('cpu' or 'memory') | |
""" | |
if 'vinfo' not in self.dataframes: | |
print("No VM data available - vInfo data missing.") | |
return | |
df = self.dataframes['vinfo'] | |
# Debug output | |
print(f"Found {len(df)} VMs in vInfo data") | |
# Ensure we have the necessary columns | |
vm_col = 'VM' | |
cluster_col = 'Cluster' | |
host_col = 'Host' | |
# Check if these columns exist | |
required_cols = [vm_col, cluster_col, host_col] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
print(f"Missing required columns: {', '.join(missing_cols)}") | |
return | |
# Define the metric columns based on the 'by' parameter | |
if by.lower() == 'cpu': | |
metric_col = 'CPUs' | |
metric_label = 'CPU Count' | |
else: # memory | |
metric_col = 'Memory' | |
metric_label = 'Memory (MB)' | |
if metric_col not in df.columns: | |
print(f"Metric column '{metric_col}' not found in vInfo data") | |
return | |
# Ensure the metric column is numeric | |
try: | |
df[metric_col] = pd.to_numeric(df[metric_col], errors='coerce') | |
except Exception as e: | |
print(f"Error converting {metric_col} to numeric: {e}") | |
return | |
print(f"\n===== TOP {top_n} VMs BY {by.upper()} =====\n") | |
# Group by cluster | |
print("BY CLUSTER:") | |
clusters = df[cluster_col].dropna().unique() | |
for cluster in clusters: | |
cluster_vms = df[df[cluster_col] == cluster].copy() | |
# Create a list of (vm_name, metric_value) tuples for sorting | |
vm_values = [] | |
for _, row in cluster_vms.iterrows(): | |
vm_name = row[vm_col] | |
metric_value = row[metric_col] if pd.notna(row[metric_col]) else 0 | |
vm_values.append((vm_name, metric_value)) | |
if not vm_values: | |
print(f"\nCluster: {cluster} - No VM data available") | |
continue | |
# Sort by metric value (descending) and take top N | |
vm_values.sort(key=lambda x: x[1], reverse=True) | |
top_vms = vm_values[:top_n] | |
print(f"\nCluster: {cluster}") | |
print(f" Top {len(top_vms)} VMs by {by.upper()}:") | |
for vm_name, value in top_vms: | |
print(f" {vm_name}: {value} {metric_label}") | |
# Group by host | |
print("\nBY HOST:") | |
hosts = df[host_col].dropna().unique() | |
for host in hosts: | |
host_vms = df[df[host_col] == host].copy() | |
# Create a list of (vm_name, metric_value) tuples for sorting | |
vm_values = [] | |
for _, row in host_vms.iterrows(): | |
vm_name = row[vm_col] | |
metric_value = row[metric_col] if pd.notna(row[metric_col]) else 0 | |
vm_values.append((vm_name, metric_value)) | |
if not vm_values: | |
print(f"\nHost: {host} - No VM data available") | |
continue | |
# Sort by metric value (descending) and take top N | |
vm_values.sort(key=lambda x: x[1], reverse=True) | |
top_vms = vm_values[:top_n] | |
print(f"\nHost: {host}") | |
print(f" Top {len(top_vms)} VMs by {by.upper()}:") | |
for vm_name, value in top_vms: | |
print(f" {vm_name}: {value} {metric_label}") | |
def get_overall_utilization_summary(self) -> Dict: | |
""" | |
Create a summary of overall utilization across the environment. | |
Returns: | |
Dictionary with summarized utilization metrics | |
""" | |
summary = { | |
'cpu': { | |
'overall_avg': None, | |
'by_cluster': {}, | |
'by_host': {} | |
}, | |
'memory': { | |
'overall_avg': None, | |
'by_cluster': {}, | |
'by_host': {} | |
}, | |
'network': { | |
'overall_stats': {}, | |
'by_cluster': {}, | |
'by_host': {} | |
} | |
} | |
# Get relevant dataframes | |
host_df = self.get_host_utilization() if 'vhost' in self.dataframes else None | |
vm_cpu_df = self.get_vm_cpu_utilization() if 'vcpu' in self.dataframes else None | |
vm_memory_df = self.get_vm_memory_utilization() if 'vmemory' in self.dataframes else None | |
vm_network_df = self.get_network_utilization() if 'vnetwork' in self.dataframes else None | |
# Calculate CPU utilization summary | |
if host_df is not None and not host_df.empty: | |
# Find CPU utilization metrics in host data | |
cpu_util_cols = [col for col in host_df.columns if 'cpu' in col.lower() and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
if cpu_util_cols: | |
# Use the first utilization column found | |
cpu_col = cpu_util_cols[0] | |
# Calculate overall average | |
overall_cpu_avg = host_df[cpu_col].mean() | |
if pd.notna(overall_cpu_avg): | |
summary['cpu']['overall_avg'] = overall_cpu_avg | |
# Calculate by cluster if available | |
cluster_cols = [col for col in host_df.columns if 'cluster' in col.lower()] | |
if cluster_cols: | |
cluster_col = cluster_cols[0] | |
for cluster in host_df[cluster_col].dropna().unique(): | |
cluster_hosts = host_df[host_df[cluster_col] == cluster] | |
cluster_avg = cluster_hosts[cpu_col].mean() | |
if pd.notna(cluster_avg): | |
summary['cpu']['by_cluster'][cluster] = cluster_avg | |
# Calculate by host | |
host_cols = [col for col in host_df.columns if 'host' in col.lower() or 'name' in col.lower() or 'esx' in col.lower()] | |
if host_cols: | |
host_col = host_cols[0] | |
for host in host_df[host_col].dropna().unique(): | |
host_cpu = host_df[host_df[host_col] == host][cpu_col].iloc[0] if not host_df[host_df[host_col] == host].empty else None | |
if pd.notna(host_cpu): | |
summary['cpu']['by_host'][host] = host_cpu | |
# If host data is not available, try using VM data | |
elif vm_cpu_df is not None and not vm_cpu_df.empty: | |
# Find CPU utilization metrics in VM data | |
cpu_util_cols = [col for col in vm_cpu_df.columns if 'cpu' in col.lower() and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
if cpu_util_cols: | |
# Use the first utilization column found | |
cpu_col = cpu_util_cols[0] | |
# Calculate overall average from VMs | |
overall_cpu_avg = vm_cpu_df[cpu_col].mean() | |
if pd.notna(overall_cpu_avg): | |
summary['cpu']['overall_avg'] = overall_cpu_avg | |
# Calculate by cluster if available | |
cluster_cols = [col for col in vm_cpu_df.columns if 'cluster' in col.lower()] | |
if cluster_cols: | |
cluster_col = cluster_cols[0] | |
for cluster in vm_cpu_df[cluster_col].dropna().unique(): | |
cluster_vms = vm_cpu_df[vm_cpu_df[cluster_col] == cluster] | |
cluster_avg = cluster_vms[cpu_col].mean() | |
if pd.notna(cluster_avg): | |
summary['cpu']['by_cluster'][cluster] = cluster_avg | |
# Calculate by host if available | |
host_cols = [col for col in vm_cpu_df.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
if host_cols: | |
host_col = host_cols[0] | |
for host in vm_cpu_df[host_col].dropna().unique(): | |
host_vms = vm_cpu_df[vm_cpu_df[host_col] == host] | |
host_avg = host_vms[cpu_col].mean() | |
if pd.notna(host_avg): | |
summary['cpu']['by_host'][host] = host_avg | |
# Calculate Memory utilization summary | |
if host_df is not None and not host_df.empty: | |
# Find Memory utilization metrics in host data | |
mem_util_cols = [col for col in host_df.columns if ('mem' in col.lower() or 'ram' in col.lower()) and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
if mem_util_cols: | |
# Use the first utilization column found | |
mem_col = mem_util_cols[0] | |
# Calculate overall average | |
overall_mem_avg = host_df[mem_col].mean() | |
if pd.notna(overall_mem_avg): | |
summary['memory']['overall_avg'] = overall_mem_avg | |
# Calculate by cluster if available | |
cluster_cols = [col for col in host_df.columns if 'cluster' in col.lower()] | |
if cluster_cols: | |
cluster_col = cluster_cols[0] | |
for cluster in host_df[cluster_col].dropna().unique(): | |
cluster_hosts = host_df[host_df[cluster_col] == cluster] | |
cluster_avg = cluster_hosts[mem_col].mean() | |
if pd.notna(cluster_avg): | |
summary['memory']['by_cluster'][cluster] = cluster_avg | |
# Calculate by host | |
host_cols = [col for col in host_df.columns if 'host' in col.lower() or 'name' in col.lower() or 'esx' in col.lower()] | |
if host_cols: | |
host_col = host_cols[0] | |
for host in host_df[host_col].dropna().unique(): | |
host_mem = host_df[host_df[host_col] == host][mem_col].iloc[0] if not host_df[host_df[host_col] == host].empty else None | |
if pd.notna(host_mem): | |
summary['memory']['by_host'][host] = host_mem | |
# If host data is not available, try using VM data | |
elif vm_memory_df is not None and not vm_memory_df.empty: | |
# Find Memory utilization metrics in VM data | |
mem_util_cols = [col for col in vm_memory_df.columns if ('mem' in col.lower() or 'ram' in col.lower()) and ('usage' in col.lower() or 'util' in col.lower() or 'pct' in col.lower())] | |
if mem_util_cols: | |
# Use the first utilization column found | |
mem_col = mem_util_cols[0] | |
# Calculate overall average from VMs | |
overall_mem_avg = vm_memory_df[mem_col].mean() | |
if pd.notna(overall_mem_avg): | |
summary['memory']['overall_avg'] = overall_mem_avg | |
# Calculate by cluster if available | |
cluster_cols = [col for col in vm_memory_df.columns if 'cluster' in col.lower()] | |
if cluster_cols: | |
cluster_col = cluster_cols[0] | |
for cluster in vm_memory_df[cluster_col].dropna().unique(): | |
cluster_vms = vm_memory_df[vm_memory_df[cluster_col] == cluster] | |
cluster_avg = cluster_vms[mem_col].mean() | |
if pd.notna(cluster_avg): | |
summary['memory']['by_cluster'][cluster] = cluster_avg | |
# Calculate by host if available | |
host_cols = [col for col in vm_memory_df.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
if host_cols: | |
host_col = host_cols[0] | |
for host in vm_memory_df[host_col].dropna().unique(): | |
host_vms = vm_memory_df[vm_memory_df[host_col] == host] | |
host_avg = host_vms[mem_col].mean() | |
if pd.notna(host_avg): | |
summary['memory']['by_host'][host] = host_avg | |
# Calculate Network statistics | |
if vm_network_df is not None and not vm_network_df.empty: | |
# Count total network adapters and connections | |
nic_cols = [col for col in vm_network_df.columns if 'nic' in col.lower() or 'adapter' in col.lower() or 'connected' in col.lower()] | |
if nic_cols: | |
# Try to find connection status columns | |
connected_cols = [col for col in nic_cols if 'connected' in col.lower() or 'status' in col.lower()] | |
if connected_cols: | |
conn_col = connected_cols[0] | |
# Count total NICs and connected NICs | |
total_nics = vm_network_df[conn_col].count() | |
connected_nics = vm_network_df[vm_network_df[conn_col].astype(str).str.lower().isin(['true', 'connected', 'yes', '1'])].shape[0] | |
summary['network']['overall_stats'] = { | |
'total_nics': total_nics, | |
'connected_nics': connected_nics, | |
'connection_rate': connected_nics / total_nics if total_nics > 0 else None | |
} | |
# Calculate by cluster if available | |
cluster_cols = [col for col in vm_network_df.columns if 'cluster' in col.lower()] | |
if cluster_cols and connected_cols: | |
cluster_col = cluster_cols[0] | |
conn_col = connected_cols[0] | |
for cluster in vm_network_df[cluster_col].dropna().unique(): | |
cluster_vms = vm_network_df[vm_network_df[cluster_col] == cluster] | |
total_nics = cluster_vms[conn_col].count() | |
connected_nics = cluster_vms[cluster_vms[conn_col].astype(str).str.lower().isin(['true', 'connected', 'yes', '1'])].shape[0] | |
summary['network']['by_cluster'][cluster] = { | |
'total_nics': total_nics, | |
'connected_nics': connected_nics, | |
'connection_rate': connected_nics / total_nics if total_nics > 0 else None | |
} | |
# Calculate by host if available | |
host_cols = [col for col in vm_network_df.columns if 'host' in col.lower() or 'esx' in col.lower()] | |
if host_cols and connected_cols: | |
host_col = host_cols[0] | |
conn_col = connected_cols[0] | |
for host in vm_network_df[host_col].dropna().unique(): | |
host_vms = vm_network_df[vm_network_df[host_col] == host] | |
total_nics = host_vms[conn_col].count() | |
connected_nics = host_vms[host_vms[conn_col].astype(str).str.lower().isin(['true', 'connected', 'yes', '1'])].shape[0] | |
summary['network']['by_host'][host] = { | |
'total_nics': total_nics, | |
'connected_nics': connected_nics, | |
'connection_rate': connected_nics / total_nics if total_nics > 0 else None | |
} | |
return summary | |
def print_overall_utilization_summary(self) -> None: | |
"""Print a formatted summary of overall utilization to the console.""" | |
summary = self.get_overall_utilization_summary() | |
print("\n===== OVERALL UTILIZATION SUMMARY =====\n") | |
# Print CPU Summary | |
print("CPU UTILIZATION:") | |
if summary['cpu'].get('overall_avg') is not None: | |
print(f" Overall Average: {summary['cpu']['overall_avg']:.2f}%") | |
else: | |
print(" Overall Average: N/A") | |
if summary['cpu'].get('by_cluster'): | |
print(" By Cluster:") | |
for cluster, value in summary['cpu']['by_cluster'].items(): | |
print(f" {cluster}: {value:.2f}%") | |
if summary['cpu'].get('by_host'): | |
print(" By Host:") | |
for host, value in summary['cpu']['by_host'].items(): | |
print(f" {host}: {value:.2f}%") | |
print() # Add a blank line | |
# Print Memory Summary | |
print("MEMORY UTILIZATION:") | |
if summary['memory'].get('overall_avg') is not None: | |
print(f" Overall Average: {summary['memory']['overall_avg']:.2f}%") | |
else: | |
print(" Overall Average: N/A") | |
if summary['memory'].get('by_cluster'): | |
print(" By Cluster:") | |
for cluster, value in summary['memory']['by_cluster'].items(): | |
print(f" {cluster}: {value:.2f}%") | |
if summary['memory'].get('by_host'): | |
print(" By Host:") | |
for host, value in summary['memory']['by_host'].items(): | |
print(f" {host}: {value:.2f}%") | |
print() # Add a blank line | |
# Print Network Summary | |
print("NETWORK CONNECTIVITY:") | |
if summary.get('network') and summary['network'].get('overall_stats'): | |
stats = summary['network']['overall_stats'] | |
print(f" Total NICs: {stats.get('total_nics', 'N/A')}") | |
print(f" Connected NICs: {stats.get('connected_nics', 'N/A')}") | |
if stats.get('connection_rate') is not None: | |
print(f" Connection Rate: {stats['connection_rate'] * 100:.2f}%") | |
else: | |
print(" Connection Rate: N/A") | |
else: | |
print(" Network information not available") | |
if summary.get('network') and summary['network'].get('by_cluster'): | |
print(" By Cluster:") | |
for cluster, stats in summary['network']['by_cluster'].items(): | |
rate = stats.get('connection_rate') | |
rate_str = f"{rate * 100:.2f}%" if rate is not None else "N/A" | |
print(f" {cluster}: {stats.get('connected_nics', 'N/A')}/{stats.get('total_nics', 'N/A')} NICs connected ({rate_str})") | |
if summary.get('network') and summary['network'].get('by_host'): | |
print(" By Host:") | |
for host, stats in summary['network']['by_host'].items(): | |
rate = stats.get('connection_rate') | |
rate_str = f"{rate * 100:.2f}%" if rate is not None else "N/A" | |
print(f" {host}: {stats.get('connected_nics', 'N/A')}/{stats.get('total_nics', 'N/A')} NICs connected ({rate_str})") | |
@click.command() | |
@click.option('--directory', '-d', type=click.Path(exists=True, file_okay=False, dir_okay=True), | |
help='Directory containing RVTools CSV files') | |
@click.option('--top-n', '-n', type=int, default=10, | |
help='Number of top VMs to display in the VM utilization report') | |
def main(directory, top_n): | |
""" | |
Parse and analyze RVTools CSV files for infrastructure utilization metrics. | |
If no directory is provided, the script will prompt for one. | |
""" | |
# If directory wasn't provided as a command-line argument, prompt for it | |
if not directory: | |
directory = click.prompt( | |
"Please enter the path to the directory containing RVTools CSV files", | |
type=click.Path(exists=True, file_okay=False, dir_okay=True) | |
) | |
click.echo(f"Analyzing RVTools CSV files in: {directory}") | |
# Initialize the analyzer with the specified directory | |
analyzer = RVToolsAnalyzer(directory) | |
# Print diagnostic information about what files were loaded | |
click.echo("\nLoaded RVTools files:") | |
for key, file_path in analyzer.file_mapping.items(): | |
if file_path: | |
click.echo(f" {key}: {os.path.basename(file_path)}") | |
else: | |
click.echo(f" {key}: Not found") | |
# Print overall utilization summary | |
click.echo("\nGenerating overall utilization summary...") | |
analyzer.print_overall_utilization_summary() | |
# Print cluster utilization report | |
click.echo("\nGenerating cluster utilization report...") | |
analyzer.print_aggregated_cluster_report() | |
# Print top N VMs by CPU utilization | |
click.echo(f"\nGenerating top {top_n} VMs by CPU utilization...") | |
analyzer.print_aggregated_vm_report(top_n=top_n, by='cpu') | |
# Print top N VMs by memory utilization | |
click.echo(f"\nGenerating top {top_n} VMs by memory utilization...") | |
analyzer.print_aggregated_vm_report(top_n=top_n, by='memory') | |
click.echo("\nAnalysis complete!") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment