Created
April 9, 2025 09:15
-
-
Save FlorianHeigl/14d6ff239099275141937da2f3172738 to your computer and use it in GitHub Desktop.
x3650m5 fan control with amd rocm-smi parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use List::Util qw[min max]; | |
# IBM x3650 Port notes: This port was made in an attempt to quiet down an IBM system x3650 M4. | |
# The porting involved changing the raw commands, removing the get GetFanRPM as it was | |
# useless in my scenario and IBM lists their fans as 1A 1B 2A 2B. | |
# Also the imm2 mantains a partial control over the fans actual speed, for example setting | |
# the fans at 30% result in an actual fan speed of roughly 24%, sometimes the script | |
# fails to update and the imm2 takes control back over the fans speed controls. | |
# Overall the real speed settings feels very coarse. | |
# This script controls IPMI compatible server's fan speeds in response to CPU Temperatures provided by lm-sensors. | |
# This script has been tested on a Dell PowerEdge R210 II, but should work on any IPMI compatible server. | |
# | |
# NOTE: The script puts your server into "Full Fan Speed Mode", and then modifies what "Full Speed" means, | |
# You have to manually use IPMI to set it to e.g. "Optimal" when you're not using the script. | |
# | |
# NOTE: You use this script at your own risk, and no warranty is provided. Do not use in produciton environments. | |
# | |
# Maintainer: Brian Wilson <[email protected]> | |
# Original Author: Layla Mah <[email protected]> | |
# Original Version: https://github.com/missmah/ipmi_tools | |
# AMD GPU Hack: Florian Heigl <[email protected]> | |
# | |
# What's new? | |
# | |
# The original script provided a "step" approach where fans would take large "steps" depending on the temps. | |
# In this version, scalar equations are generated to provide an easy (and quiet) slope to follow to the next step. | |
# These equations are simple Y=mx+b linear slopes that effectivly provide a fan "curve" based on the entries in the | |
# %cpu_temp_to_fan_speed hash table. | |
# | |
# More documentation is planned, however I am available to answer basic configuration questions in the mean time. | |
# System Configuration | |
my $hostname = `hostname`; | |
# Remove line ending from $hostname | |
$hostname =~ s/\n//g; | |
my $number_of_cpus = 2; # Number of CPUs to search for | |
my $number_of_gpus = 1; # Number of GPUs to search for | |
my $number_of_fans = 6; # Number of FANs to search for | |
my $number_of_fanbanks = 2; # Number of BANKs of FANs to update | |
my $min_temp_change = 0; # *C minimum change to actually cause a fan speed update | |
my $seconds_to_sleep = 3; # Number of seconds to sleep between update loops | |
# IPMI Configuration | |
#my $ipmi_username = "username_goes_here"; | |
#my $ipmi_password = "password_goes_here"; | |
#my $ipmi_ipaddress = "ip_address_goes_here"; | |
my $ipmi_cmd_listall = "sdr list full"; | |
#my $ipmi_connectmode = "open"; | |
my $ipmi_preamble = "ipmitool"; | |
# Not needed as it's running directly on proxmox as a service | |
#my $ipmi_preamble = "ipmitool -I $ipmi_connectmode"; | |
#my $ipmi_preamble = 'ipmitool -I $ipmi_connectmode -U $ipmi_username -P $ipmi_password -H $ipmi_ipaddress'; | |
# CPU Temp -> Fan Speed Mappings | |
my %cpu_temp_to_fan_speed; | |
$cpu_temp_to_fan_speed{80} = 250; | |
$cpu_temp_to_fan_speed{70} = 150; | |
$cpu_temp_to_fan_speed{55} = 50; | |
$cpu_temp_to_fan_speed{40} = 25; | |
$cpu_temp_to_fan_speed{10} = 5; | |
my %cpu_temp_scale; | |
# Below this line follows the actual implementation of the script | |
my $g_current_fan_duty_cycle = 0; | |
my $g_current_gpu_temp = 0; | |
my $g_current_cpu_temp = 0; | |
my $g_last_set_cpu_temp = 0; | |
my $g_last_set_gpu_temp = 0; | |
sub Internal_DoSetFanSpeed | |
{ | |
my ( $fan_speed ) = @_; | |
# Sets the speed for each individual fan | |
for (my $i = 1; $i <= $number_of_fanbanks; $i++) | |
{ | |
print "Setting FanBank n°$i speed at $fan_speed\n"; | |
#ipmitool raw 0x3a 0x07 1 100 0 | |
`$ipmi_preamble raw 0x3a 0x07 $i $fan_speed 0x01> /dev/null 2>&1`; | |
} | |
} | |
sub SetFanSpeed | |
{ | |
my ( $fan_speed ) = @_; | |
my $cpu_temp_difference = $g_current_cpu_temp - $g_last_set_cpu_temp; | |
my $gpu_temp_difference = $g_current_gpu_temp - $g_last_set_gpu_temp; | |
if( ( (abs $cpu_temp_difference) > $min_temp_change ) or ( (abs $gpu_temp_difference) > $min_temp_change ) ) | |
{ | |
# Set all 4 fan banks to operate at $fan_speed duty cycle (0x0-0x64 valid range) | |
print "\n"; | |
print "********************** Updating Fan Speeds **********************\n"; | |
print "We last updated fan speed $cpu_temp_difference *C ago (CPU Temperature).\n"; | |
print "We last updated fan speed $gpu_temp_difference *C ago (GPU Temperature).\n"; | |
print "Current CPU Temperature is $g_current_cpu_temp *C.\n"; | |
print "Current GPU Temperature is $g_current_gpu_temp *C.\n"; | |
print "*****************************************************************\n"; | |
$g_last_set_cpu_temp = $g_current_cpu_temp; | |
$g_last_set_gpu_temp = $g_current_gpu_temp; | |
$g_current_fan_duty_cycle = $fan_speed; | |
Internal_DoSetFanSpeed( $fan_speed ); | |
} | |
} | |
# Get current CPU temperature from lmsensors. If there are multiple CPU's, | |
# the script will automatically choose the highest temperature. | |
sub GetCPUTemp | |
{ | |
my $current_cpu_temp = 0; | |
my $sensors_output = `sensors | grep "Package" | sed 's/.*:\\s*+\\(.*\\) .*(.*/\\1/'`; | |
my @vals = split("\n", $sensors_output); | |
foreach my $value (@vals) | |
{ | |
# Remove the *C from $sensors_output (and anything else thats not numbers or decimal points) | |
my $cpu_temp = `echo $value | sed -e 's/[^0-9. ]*//g' -e 's/ \\+/ /g'`; | |
$current_cpu_temp = max($cpu_temp, $current_cpu_temp); | |
} | |
# Remove trailing line endings | |
$current_cpu_temp =~ s/\n//g; | |
return $current_cpu_temp; | |
} | |
# Get current CPU temperature from lmsensors. If there are multiple CPU's, | |
# the script will automatically choose the highest temperature. | |
sub GetCPUTemp | |
{ | |
my $current_cpu_temp = 0; | |
my $sensors_output = `sensors | grep "Package" | sed 's/.*:\\s*+\\(.*\\) .*(.*/\\1/'`; | |
my @vals = split("\n", $sensors_output); | |
foreach my $value (@vals) | |
{ | |
# Remove the *C from $sensors_output (and anything else thats not numbers or decimal points) | |
my $cpu_temp = `echo $value | sed -e 's/[^0-9. ]*//g' -e 's/ \\+/ /g'`; | |
$current_cpu_temp = max($cpu_temp, $current_cpu_temp); | |
} | |
# Remove trailing line endings | |
$current_cpu_temp =~ s/\n//g; | |
return $current_cpu_temp; | |
} | |
sub GetGPUTemp | |
{ | |
my $current_gpu_temp = 0; | |
my $smi_output = `rocm-smi | grep ^[0-9]| awk '{print \$5}' | cut -f1 -d\.`; | |
$current_gpu_temp = $smi_output ; | |
#my $smi_output = `rocm-smi | grep ^[0-9]`; | |
#my @vals = split("\n", $smi_output); | |
#foreach my $value (@vals) | |
#{ | |
# my $gpu_temp = `echo $value | awk '{print $5}' | cut -f1 -d\.`; | |
# $current_gpu_temp = max($gpu_temp, $current_gpu_temp); | |
#} | |
# Remove trailing line endings | |
$current_gpu_temp =~ s/\n//g; | |
return $current_gpu_temp; | |
} | |
sub CalculateScalars | |
{ | |
my @previous = (); | |
foreach my $a (sort keys %cpu_temp_to_fan_speed){ | |
my @current = ($a, $cpu_temp_to_fan_speed{$a}); | |
if (@previous) { | |
my $m = ($current[1] - $previous[1]) / ($current[0] - $previous[0]); | |
my $b = $current[1] - ($m*$current[0]); | |
$cpu_temp_scale{$a} = [($m, $b)]; | |
} | |
@previous = @current; | |
} | |
} | |
sub UpdateFanSpeed | |
{ | |
my $current_cpu_temp = GetCPUTemp(); | |
my $current_gpu_temp = GetGPUTemp(); | |
$g_current_cpu_temp = $current_cpu_temp; | |
$g_current_gpu_temp = $current_gpu_temp; | |
print "Maximum CPU Temperature Seen: $current_cpu_temp degrees C.\n"; | |
print "Maximum GPU Temperature Seen: $current_gpu_temp degrees C.\n"; | |
my $desired_fan_speed = 0; | |
my $calculated_speed = 0; | |
foreach my $a (reverse sort keys %cpu_temp_scale) { | |
#print "A$a\n"; | |
if ($current_cpu_temp <= $a) { | |
my @formula = @{$cpu_temp_scale{$a}}; | |
$calculated_speed = ($formula[0] * $current_cpu_temp) + $formula[1]; | |
#print "Test#2:$formula[0] $formula[1]\n"; | |
$desired_fan_speed = sprintf("%.0f", $calculated_speed); | |
} | |
} | |
print "Current Fan Duty Cycle: $g_current_fan_duty_cycle%\n"; | |
print "Desired Fan Duty Cycle: $desired_fan_speed%\n"; | |
# Output basic InfluxDB metrics to /tmp/fan_speed_telegraf | |
#my $speed_raw = sprintf("%x", $calculated_speed); | |
#open(FH, '>', '/tmp/fan_speed_telegraf') or die $!; | |
#print FH "fans,host=$hostname speed_percent=$calculated_speed\n"; | |
#print FH "fans,host=$hostname speed_raw=$speed_raw\n"; | |
#close(FH); | |
SetFanSpeed($desired_fan_speed); | |
} | |
print "Starting.\n"; | |
CalculateScalars(); | |
while( 1 ) | |
{ | |
print "\n"; | |
print "\n"; | |
print "\n"; | |
print "\n"; | |
print "\n"; | |
print "=================================================================\n"; | |
print "Calling UpdateFanSpeed()...\n"; | |
print "=================================================================\n"; | |
UpdateFanSpeed(); | |
print "=================================================================\n"; | |
print "Update Complete - going to sleep for $seconds_to_sleep seconds...\n"; | |
print "=================================================================\n"; | |
sleep $seconds_to_sleep; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have no load and wanted it silent, my GPU is using 20W at the desktop
Things that I'm aware of you would need to fix if you want to use this reliably:
\$
in the multi gpu iterationBesides that, perl code as you can see. Just too many lines for the work.
Fake-Perl code, to be honest, the whole thing is calling shell more often than I can say "i want to sleep".
I'm not gonna do this since I was honestly just on the way of
But the fan control? it's quite awesome.