diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2025-04-07 16:45:50 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2025-04-07 16:45:50 -0400 |
commit | 72ba87e277572eddb25784563faa3eac111c9556 (patch) | |
tree | 875d04ab41f8dd8d6377527c4242381e203ee317 | |
parent | 39c57bca3cbb42b1939a28377d8ef6cfab872450 (diff) |
-rw-r--r-- | libsmctrl.c | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/libsmctrl.c b/libsmctrl.c index 09fc627..5a45611 100644 --- a/libsmctrl.c +++ b/libsmctrl.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /** | 1 | /** |
2 | * Copyright 2022-2024 Joshua Bakita | 2 | * Copyright 2022-2025 Joshua Bakita |
3 | * Library to control SM masks on CUDA launches. Co-opts preexisting debug | 3 | * Library to control SM masks on CUDA launches. Co-opts preexisting debug |
4 | * logic in the CUDA driver library, and thus requires a build with -lcuda. | 4 | * logic in the CUDA driver library, and thus requires a build with -lcuda. |
5 | * | 5 | * |
@@ -10,7 +10,7 @@ | |||
10 | * +-----------+---------------+---------------+--------------+ | 10 | * +-----------+---------------+---------------+--------------+ |
11 | * | Version | Global Mask | Stream Mask | Next Mask | | 11 | * | Version | Global Mask | Stream Mask | Next Mask | |
12 | * +-----------+---------------+---------------+--------------+ | 12 | * +-----------+---------------+---------------+--------------+ |
13 | * | 8.0-12.6 | TMD/QMD Hook | stream struct | TMD/QMD Hook | | 13 | * | 8.0-12.8 | TMD/QMD Hook | stream struct | TMD/QMD Hook | |
14 | * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook | | 14 | * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook | |
15 | * +-----------+---------------+---------------+--------------+ | 15 | * +-----------+---------------+---------------+--------------+ |
16 | * "N/A" indicates that a mask type is unsupported on that CUDA version. | 16 | * "N/A" indicates that a mask type is unsupported on that CUDA version. |
@@ -204,6 +204,10 @@ void libsmctrl_set_next_mask(uint64_t mask) { | |||
204 | // CUDA 12.5 and 12.6 use the same offset | 204 | // CUDA 12.5 and 12.6 use the same offset |
205 | // 12.5 tested on 555.58.02 | 205 | // 12.5 tested on 555.58.02 |
206 | // 12.6 tested on 560.35.03 | 206 | // 12.6 tested on 560.35.03 |
207 | #define CU_12_7_MASK_OFF 0x4fc | ||
208 | // CUDA 12.7 and 12.8 use the same offset | ||
209 | // 12.7 tested on 565.77 | ||
210 | // 12.8 tested on 570.124.06 | ||
207 | 211 | ||
208 | // Offsets for the stream struct on Jetson aarch64 | 212 | // Offsets for the stream struct on Jetson aarch64 |
209 | #define CU_9_0_MASK_OFF_JETSON 0x128 | 213 | #define CU_9_0_MASK_OFF_JETSON 0x128 |
@@ -334,6 +338,10 @@ void libsmctrl_set_stream_mask_ext(void* stream, uint128_t mask) { | |||
334 | case 12060: | 338 | case 12060: |
335 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF); | 339 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF); |
336 | break; | 340 | break; |
341 | case 12070: | ||
342 | case 12080: | ||
343 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_7_MASK_OFF); | ||
344 | break; | ||
337 | #elif __aarch64__ | 345 | #elif __aarch64__ |
338 | case 9000: { | 346 | case 9000: { |
339 | // Jetson TX2 offset is slightly different on CUDA 9.0. | 347 | // Jetson TX2 offset is slightly different on CUDA 9.0. |