aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libsmctrl.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index 09fc627..5a45611 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -1,5 +1,5 @@
1/** 1/**
2 * Copyright 2022-2024 Joshua Bakita 2 * Copyright 2022-2025 Joshua Bakita
3 * Library to control SM masks on CUDA launches. Co-opts preexisting debug 3 * Library to control SM masks on CUDA launches. Co-opts preexisting debug
4 * logic in the CUDA driver library, and thus requires a build with -lcuda. 4 * logic in the CUDA driver library, and thus requires a build with -lcuda.
5 * 5 *
@@ -10,7 +10,7 @@
10 * +-----------+---------------+---------------+--------------+ 10 * +-----------+---------------+---------------+--------------+
11 * | Version | Global Mask | Stream Mask | Next Mask | 11 * | Version | Global Mask | Stream Mask | Next Mask |
12 * +-----------+---------------+---------------+--------------+ 12 * +-----------+---------------+---------------+--------------+
13 * | 8.0-12.6 | TMD/QMD Hook | stream struct | TMD/QMD Hook | 13 * | 8.0-12.8 | TMD/QMD Hook | stream struct | TMD/QMD Hook |
14 * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook | 14 * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook |
15 * +-----------+---------------+---------------+--------------+ 15 * +-----------+---------------+---------------+--------------+
16 * "N/A" indicates that a mask type is unsupported on that CUDA version. 16 * "N/A" indicates that a mask type is unsupported on that CUDA version.
@@ -204,6 +204,10 @@ void libsmctrl_set_next_mask(uint64_t mask) {
204// CUDA 12.5 and 12.6 use the same offset 204// CUDA 12.5 and 12.6 use the same offset
205// 12.5 tested on 555.58.02 205// 12.5 tested on 555.58.02
206// 12.6 tested on 560.35.03 206// 12.6 tested on 560.35.03
207#define CU_12_7_MASK_OFF 0x4fc
208// CUDA 12.7 and 12.8 use the same offset
209// 12.7 tested on 565.77
210// 12.8 tested on 570.124.06
207 211
208// Offsets for the stream struct on Jetson aarch64 212// Offsets for the stream struct on Jetson aarch64
209#define CU_9_0_MASK_OFF_JETSON 0x128 213#define CU_9_0_MASK_OFF_JETSON 0x128
@@ -334,6 +338,10 @@ void libsmctrl_set_stream_mask_ext(void* stream, uint128_t mask) {
334 case 12060: 338 case 12060:
335 hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF); 339 hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF);
336 break; 340 break;
341 case 12070:
342 case 12080:
343 hw_mask_v2 = (void*)(stream_struct_base + CU_12_7_MASK_OFF);
344 break;
337#elif __aarch64__ 345#elif __aarch64__
338 case 9000: { 346 case 9000: {
339 // Jetson TX2 offset is slightly different on CUDA 9.0. 347 // Jetson TX2 offset is slightly different on CUDA 9.0.