AMD System Management Interface (AMD SMI) Go library for unified GPU and CPU management and monitoring
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Power consumption monitoring, thermal sensors, fan control, and power limit management for AMD GPUs and CPUs. Essential for system thermal management and power optimization.
Get comprehensive power consumption data including current, average, and maximum power draw.
amdsmi_status_t amdsmi_get_power_info(amdsmi_processor_handle processor_handle,
amdsmi_power_info_t* power_info);Power Information Structure:
typedef struct {
uint64_t current_socket_power; // Current socket power (W)
uint64_t average_socket_power; // Average socket power (W)
uint64_t max_socket_power_limit; // Maximum power limit (W)
uint64_t min_socket_power_limit; // Minimum power limit (W)
} amdsmi_power_info_t;Usage Example:
amdsmi_power_info_t power_info;
amdsmi_status_t status = amdsmi_get_power_info(gpu_handle, &power_info);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("Power Status:\n");
printf(" Current: %lu W\n", power_info.current_socket_power);
printf(" Average: %lu W\n", power_info.average_socket_power);
printf(" Max Limit: %lu W\n", power_info.max_socket_power_limit);
printf(" Min Limit: %lu W\n", power_info.min_socket_power_limit);
}Monitor various temperature sensors across the GPU die and components.
amdsmi_status_t amdsmi_get_temp_metric(amdsmi_processor_handle processor_handle,
amdsmi_temperature_type_t sensor_type,
amdsmi_temperature_metric_t metric,
int64_t* temperature);Temperature Sensor Types:
typedef enum {
AMDSMI_TEMP_TYPE_EDGE, // Edge temperature sensor
AMDSMI_TEMP_TYPE_JUNCTION, // Junction temperature sensor
AMDSMI_TEMP_TYPE_MEMORY, // Memory temperature sensor
AMDSMI_TEMP_TYPE_HBM_0, // HBM instance 0
AMDSMI_TEMP_TYPE_HBM_1, // HBM instance 1
AMDSMI_TEMP_TYPE_HBM_2, // HBM instance 2
AMDSMI_TEMP_TYPE_HBM_3, // HBM instance 3
AMDSMI_TEMP_TYPE_PLX // PLX sensor
} amdsmi_temperature_type_t;Temperature Metrics:
typedef enum {
AMDSMI_TEMP_CURRENT, // Current temperature
AMDSMI_TEMP_MAX, // Maximum recorded temperature
AMDSMI_TEMP_MIN, // Minimum recorded temperature
AMDSMI_TEMP_MAX_HYST, // Maximum temperature hysteresis
AMDSMI_TEMP_MIN_HYST, // Minimum temperature hysteresis
AMDSMI_TEMP_CRITICAL, // Critical temperature threshold
AMDSMI_TEMP_CRITICAL_HYST, // Critical temperature hysteresis
AMDSMI_TEMP_EMERGENCY, // Emergency temperature threshold
AMDSMI_TEMP_EMERGENCY_HYST // Emergency temperature hysteresis
} amdsmi_temperature_metric_t;Usage Example:
// Get current edge temperature
int64_t edge_temp;
amdsmi_status_t status = amdsmi_get_temp_metric(gpu_handle,
AMDSMI_TEMP_TYPE_EDGE,
AMDSMI_TEMP_CURRENT,
&edge_temp);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("GPU Edge Temperature: %ld°C\n", edge_temp / 1000); // Convert from millicelsius
}
// Get critical temperature threshold
int64_t critical_temp;
status = amdsmi_get_temp_metric(gpu_handle,
AMDSMI_TEMP_TYPE_EDGE,
AMDSMI_TEMP_CRITICAL,
&critical_temp);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("Critical Temperature: %ld°C\n", critical_temp / 1000);
}Monitor fan speeds and RPM values for cooling system management.
amdsmi_status_t amdsmi_get_fan_speed(amdsmi_processor_handle processor_handle,
uint32_t sensor_idx,
int64_t* speed);Parameters:
processor_handle: GPU handlesensor_idx: Fan sensor index (typically 0 for primary fan)speed: Output fan speed in RPMamdsmi_status_t amdsmi_get_fan_speed_max(amdsmi_processor_handle processor_handle,
uint32_t sensor_idx,
uint64_t* max_speed);Usage Example:
// Get current fan speed
int64_t fan_speed;
amdsmi_status_t status = amdsmi_get_fan_speed(gpu_handle, 0, &fan_speed);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("Fan Speed: %ld RPM\n", fan_speed);
}
// Get maximum fan speed
uint64_t max_fan_speed;
status = amdsmi_get_fan_speed_max(gpu_handle, 0, &max_fan_speed);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("Max Fan Speed: %lu RPM\n", max_fan_speed);
double fan_percent = (double)fan_speed / max_fan_speed * 100.0;
printf("Fan Usage: %.1f%%\n", fan_percent);
}Set and get power consumption limits for power management.
amdsmi_status_t amdsmi_set_power_cap(amdsmi_processor_handle processor_handle,
uint32_t sensor_ind,
uint64_t cap);amdsmi_status_t amdsmi_get_power_cap_info(amdsmi_processor_handle processor_handle,
uint32_t sensor_ind,
amdsmi_power_cap_info_t* info);Power Cap Information Structure:
typedef struct {
uint64_t power_cap; // Current power cap (W)
uint64_t default_power_cap; // Default power cap (W)
uint64_t dpm_cap; // DPM power cap (W)
uint64_t min_power_cap; // Minimum power cap (W)
uint64_t max_power_cap; // Maximum power cap (W)
} amdsmi_power_cap_info_t;Usage Example:
// Get current power cap info
amdsmi_power_cap_info_t cap_info;
amdsmi_status_t status = amdsmi_get_power_cap_info(gpu_handle, 0, &cap_info);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("Power Cap Info:\n");
printf(" Current: %lu W\n", cap_info.power_cap);
printf(" Default: %lu W\n", cap_info.default_power_cap);
printf(" Range: %lu - %lu W\n", cap_info.min_power_cap, cap_info.max_power_cap);
}
// Set new power limit (requires appropriate permissions)
uint64_t new_cap = 200; // 200W
status = amdsmi_set_power_cap(gpu_handle, 0, new_cap);
if (status == AMDSMI_STATUS_SUCCESS) {
printf("Power cap set to %lu W\n", new_cap);
} else if (status == AMDSMI_STATUS_PERMISSION) {
printf("Insufficient permissions to set power cap\n");
}Monitor GPU voltage levels and voltage curves.
amdsmi_status_t amdsmi_get_gpu_volt_metric(amdsmi_processor_handle processor_handle,
amdsmi_voltage_type_t sensor_type,
amdsmi_voltage_metric_t metric,
int64_t* voltage);Voltage Types:
typedef enum {
AMDSMI_VOLT_TYPE_VDDGFX, // Graphics voltage
AMDSMI_VOLT_TYPE_VDDNB, // Northbridge voltage
AMDSMI_VOLT_TYPE_VDDMEM // Memory voltage
} amdsmi_voltage_type_t;Voltage Metrics:
typedef enum {
AMDSMI_VOLT_CURRENT, // Current voltage
AMDSMI_VOLT_MAX, // Maximum voltage
AMDSMI_VOLT_MIN // Minimum voltage
} amdsmi_voltage_metric_t;import amdsmi
gpu_handles = amdsmi.amdsmi_get_processor_handles(amdsmi.AmdSmiProcessorType.AMD_GPU)
if gpu_handles:
gpu_handle = gpu_handles[0]
# Get power information
power_info = amdsmi.amdsmi_get_power_info(gpu_handle)
print(f"Current Power: {power_info['current_socket_power']}W")
print(f"Max Power Limit: {power_info['max_socket_power_limit']}W")
# Get temperature
temp = amdsmi.amdsmi_get_temp_metric(gpu_handle,
amdsmi.AmdSmiTemperatureType.EDGE,
amdsmi.AmdSmiTemperatureMetric.CURRENT)
print(f"GPU Temperature: {temp // 1000}°C")
# Get fan speed
fan_speed = amdsmi.amdsmi_get_fan_speed(gpu_handle, 0)
print(f"Fan Speed: {fan_speed} RPM")
# Get power cap info
power_cap_info = amdsmi.amdsmi_get_power_cap_info(gpu_handle, 0)
print(f"Current Power Cap: {power_cap_info['power_cap']}W")import "github.com/ROCm/amdsmi"
// Get power and thermal data for each GPU
for i := 0; i < int(goamdsmi.GO_gpu_num_monitor_devices()); i++ {
// Get power consumption
power := goamdsmi.GO_gpu_dev_power_ave_get(i)
fmt.Printf("GPU %d Average Power: %d W\n", i, power)
// Get temperature
temp := goamdsmi.GO_gpu_dev_temp_get(i, goamdsmi.TEMPERATURE_TYPE_EDGE)
fmt.Printf("GPU %d Temperature: %d°C\n", i, temp/1000)
// Get fan speed
fanSpeed := goamdsmi.GO_gpu_dev_fan_speed_get(i, 0)
fmt.Printf("GPU %d Fan Speed: %d RPM\n", i, fanSpeed)
}use amdsmi::{get_power_info, get_temp_metric, get_fan_speed};
use amdsmi::{TemperatureType, TemperatureMetric};
// Get comprehensive thermal and power data
let power_info = get_power_info(gpu_handle)?;
println!("Current Power: {}W", power_info.current_socket_power);
let edge_temp = get_temp_metric(gpu_handle,
TemperatureType::Edge,
TemperatureMetric::Current)?;
println!("GPU Temperature: {}°C", edge_temp / 1000);
let fan_speed = get_fan_speed(gpu_handle, 0)?;
println!("Fan Speed: {} RPM", fan_speed);Install with Tessl CLI
npx tessl i tessl/go-amdsmi