diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2025-04-07 16:45:50 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2025-04-07 16:45:50 -0400 |
| commit | 72ba87e277572eddb25784563faa3eac111c9556 (patch) | |
| tree | 875d04ab41f8dd8d6377527c4242381e203ee317 | |
| parent | 39c57bca3cbb42b1939a28377d8ef6cfab872450 (diff) | |
Support stream masking on CUDA 12.7 (x86) and 12.8 (x86)
| -rw-r--r-- | libsmctrl.c | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/libsmctrl.c b/libsmctrl.c index 09fc627..5a45611 100644 --- a/libsmctrl.c +++ b/libsmctrl.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /** | 1 | /** |
| 2 | * Copyright 2022-2024 Joshua Bakita | 2 | * Copyright 2022-2025 Joshua Bakita |
| 3 | * Library to control SM masks on CUDA launches. Co-opts preexisting debug | 3 | * Library to control SM masks on CUDA launches. Co-opts preexisting debug |
| 4 | * logic in the CUDA driver library, and thus requires a build with -lcuda. | 4 | * logic in the CUDA driver library, and thus requires a build with -lcuda. |
| 5 | * | 5 | * |
| @@ -10,7 +10,7 @@ | |||
| 10 | * +-----------+---------------+---------------+--------------+ | 10 | * +-----------+---------------+---------------+--------------+ |
| 11 | * | Version | Global Mask | Stream Mask | Next Mask | | 11 | * | Version | Global Mask | Stream Mask | Next Mask | |
| 12 | * +-----------+---------------+---------------+--------------+ | 12 | * +-----------+---------------+---------------+--------------+ |
| 13 | * | 8.0-12.6 | TMD/QMD Hook | stream struct | TMD/QMD Hook | | 13 | * | 8.0-12.8 | TMD/QMD Hook | stream struct | TMD/QMD Hook | |
| 14 | * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook | | 14 | * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook | |
| 15 | * +-----------+---------------+---------------+--------------+ | 15 | * +-----------+---------------+---------------+--------------+ |
| 16 | * "N/A" indicates that a mask type is unsupported on that CUDA version. | 16 | * "N/A" indicates that a mask type is unsupported on that CUDA version. |
| @@ -204,6 +204,10 @@ void libsmctrl_set_next_mask(uint64_t mask) { | |||
| 204 | // CUDA 12.5 and 12.6 use the same offset | 204 | // CUDA 12.5 and 12.6 use the same offset |
| 205 | // 12.5 tested on 555.58.02 | 205 | // 12.5 tested on 555.58.02 |
| 206 | // 12.6 tested on 560.35.03 | 206 | // 12.6 tested on 560.35.03 |
| 207 | #define CU_12_7_MASK_OFF 0x4fc | ||
| 208 | // CUDA 12.7 and 12.8 use the same offset | ||
| 209 | // 12.7 tested on 565.77 | ||
| 210 | // 12.8 tested on 570.124.06 | ||
| 207 | 211 | ||
| 208 | // Offsets for the stream struct on Jetson aarch64 | 212 | // Offsets for the stream struct on Jetson aarch64 |
| 209 | #define CU_9_0_MASK_OFF_JETSON 0x128 | 213 | #define CU_9_0_MASK_OFF_JETSON 0x128 |
| @@ -334,6 +338,10 @@ void libsmctrl_set_stream_mask_ext(void* stream, uint128_t mask) { | |||
| 334 | case 12060: | 338 | case 12060: |
| 335 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF); | 339 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF); |
| 336 | break; | 340 | break; |
| 341 | case 12070: | ||
| 342 | case 12080: | ||
| 343 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_7_MASK_OFF); | ||
| 344 | break; | ||
| 337 | #elif __aarch64__ | 345 | #elif __aarch64__ |
| 338 | case 9000: { | 346 | case 9000: { |
| 339 | // Jetson TX2 offset is slightly different on CUDA 9.0. | 347 | // Jetson TX2 offset is slightly different on CUDA 9.0. |
