or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cpu-management.mddevice-info.mdevents.mdgpu-performance.mdindex.mdinitialization.mdmemory.mdperformance-control.mdpower-thermal.mdtopology-ras.md

events.mddocs/

0

# Events and Notifications

1

2

Real-time event monitoring, GPU reset detection, and system state change notifications. Essential for responsive monitoring applications and automated system management.

3

4

## Capabilities

5

6

### Event Notification

7

8

Get real-time notifications of GPU events and state changes.

9

10

```cpp { .api }

11

amdsmi_status_t amdsmi_get_gpu_event_notification(int timeout_ms,

12

uint32_t* num_elem,

13

amdsmi_evt_notification_data_t* data);

14

```

15

16

**Event Notification Data Structure:**

17

```cpp { .api }

18

typedef struct {

19

amdsmi_processor_handle processor_handle; // GPU that generated the event

20

amdsmi_evt_notification_type_t event; // Event type

21

char message[AMDSMI_MAX_STRING_LENGTH]; // Event message

22

} amdsmi_evt_notification_data_t;

23

```

24

25

**Event Types:**

26

```cpp { .api }

27

typedef enum {

28

AMDSMI_EVT_NOTIF_VMFAULT, // VM fault event

29

AMDSMI_EVT_NOTIF_FIRST, // First event marker

30

AMDSMI_EVT_NOTIF_LAST, // Last event marker

31

AMDSMI_EVT_NOTIF_GPU_PRE_RESET, // GPU pre-reset notification

32

AMDSMI_EVT_NOTIF_GPU_POST_RESET // GPU post-reset notification

33

} amdsmi_evt_notification_type_t;

34

```

35

36

### Event Mask Configuration

37

38

Configure which events to monitor by setting notification masks.

39

40

```cpp { .api }

41

amdsmi_status_t amdsmi_set_gpu_event_notification_mask(amdsmi_processor_handle processor_handle,

42

uint64_t mask);

43

```

44

45

**Event Mask Bits:**

46

```cpp { .api }

47

#define AMDSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - AMDSMI_EVT_NOTIF_FIRST))

48

```

49

50

**Usage Example:**

51

```cpp

52

// Enable GPU reset notifications

53

uint64_t mask = AMDSMI_EVENT_MASK_FROM_INDEX(AMDSMI_EVT_NOTIF_GPU_PRE_RESET) |

54

AMDSMI_EVENT_MASK_FROM_INDEX(AMDSMI_EVT_NOTIF_GPU_POST_RESET);

55

56

amdsmi_status_t status = amdsmi_set_gpu_event_notification_mask(gpu_handle, mask);

57

58

if (status == AMDSMI_STATUS_SUCCESS) {

59

printf("Event notifications enabled for GPU resets\n");

60

61

// Monitor for events with 5 second timeout

62

amdsmi_evt_notification_data_t events[10];

63

uint32_t num_events = 10;

64

65

status = amdsmi_get_gpu_event_notification(5000, &num_events, events);

66

67

if (status == AMDSMI_STATUS_SUCCESS) {

68

for (uint32_t i = 0; i < num_events; i++) {

69

printf("Event: %d, Message: %s\n",

70

events[i].event, events[i].message);

71

}

72

} else if (status == AMDSMI_STATUS_INTERRUPT) {

73

printf("Event monitoring timed out\n");

74

}

75

}

76

```

77

78

## Language Interface Examples

79

80

### Python

81

```python

82

import amdsmi

83

import time

84

85

gpu_handles = amdsmi.amdsmi_get_processor_handles(amdsmi.AmdSmiProcessorType.AMD_GPU)

86

if gpu_handles:

87

gpu_handle = gpu_handles[0]

88

89

try:

90

# Enable event notifications for GPU resets

91

reset_mask = (amdsmi.AmdSmiEventType.GPU_PRE_RESET |

92

amdsmi.AmdSmiEventType.GPU_POST_RESET)

93

amdsmi.amdsmi_set_gpu_event_notification_mask(gpu_handle, reset_mask)

94

95

print("Monitoring GPU events... (Press Ctrl+C to stop)")

96

while True:

97

try:

98

# Wait for events with 2 second timeout

99

events = amdsmi.amdsmi_get_gpu_event_notification(2000)

100

101

for event in events:

102

print(f"GPU Event: {event['event']}")

103

print(f"Message: {event['message']}")

104

print(f"GPU Handle: {event['processor_handle']}")

105

print("-" * 40)

106

107

except amdsmi.AmdSmiTimeoutException:

108

# Timeout is normal - continue monitoring

109

continue

110

111

except KeyboardInterrupt:

112

print("Event monitoring stopped")

113

except amdsmi.AmdSmiException as e:

114

print(f"Event monitoring error: {e}")

115

```

116

117

### Rust

118

```rust

119

use amdsmi::{set_gpu_event_notification_mask, get_gpu_event_notification};

120

use amdsmi::{EventNotificationType, EvtNotificationData};

121

use std::time::Duration;

122

123

// Set up event monitoring

124

let reset_events = EventNotificationType::GpuPreReset as u64 |

125

EventNotificationType::GpuPostReset as u64;

126

127

match set_gpu_event_notification_mask(gpu_handle, reset_events) {

128

Ok(_) => {

129

println!("Event monitoring enabled");

130

131

// Monitor events in a loop

132

loop {

133

match get_gpu_event_notification(Duration::from_secs(2)) {

134

Ok(events) => {

135

for event in events {

136

println!("GPU Event: {:?}", event.event);

137

println!("Message: {}", event.message);

138

println!("---");

139

}

140

},

141

Err(amdsmi::AmdsmiError::Timeout) => {

142

// Timeout is normal, continue monitoring

143

continue;

144

},

145

Err(e) => {

146

println!("Event monitoring error: {:?}", e);

147

break;

148

}

149

}

150

}

151

},

152

Err(e) => println!("Failed to enable event monitoring: {:?}", e),

153

}

154

```

155

156

## Event Monitoring Best Practices

157

158

1. **Timeout Handling**: Use appropriate timeouts to avoid blocking indefinitely

159

2. **Event Filtering**: Only enable notifications for events you need to handle

160

3. **Resource Cleanup**: Properly disable event monitoring when shutting down

161

4. **Thread Safety**: Event monitoring may require dedicated threads in multi-threaded applications

162

5. **Error Recovery**: Handle GPU reset events gracefully by reinitializing connections