From 72ba87e277572eddb25784563faa3eac111c9556 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Mon, 7 Apr 2025 16:45:50 -0400 Subject: Support stream masking on CUDA 12.7 (x86) and 12.8 (x86) --- libsmctrl.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libsmctrl.c b/libsmctrl.c index 09fc627..5a45611 100644 --- a/libsmctrl.c +++ b/libsmctrl.c @@ -1,5 +1,5 @@ /** - * Copyright 2022-2024 Joshua Bakita + * Copyright 2022-2025 Joshua Bakita * Library to control SM masks on CUDA launches. Co-opts preexisting debug * logic in the CUDA driver library, and thus requires a build with -lcuda. * @@ -10,7 +10,7 @@ * +-----------+---------------+---------------+--------------+ * | Version | Global Mask | Stream Mask | Next Mask | * +-----------+---------------+---------------+--------------+ - * | 8.0-12.6 | TMD/QMD Hook | stream struct | TMD/QMD Hook | + * | 8.0-12.8 | TMD/QMD Hook | stream struct | TMD/QMD Hook | * | 6.5-7.5 | TMD/QMD Hook | N/A | TMD/QMD Hook | * +-----------+---------------+---------------+--------------+ * "N/A" indicates that a mask type is unsupported on that CUDA version. @@ -204,6 +204,10 @@ void libsmctrl_set_next_mask(uint64_t mask) { // CUDA 12.5 and 12.6 use the same offset // 12.5 tested on 555.58.02 // 12.6 tested on 560.35.03 +#define CU_12_7_MASK_OFF 0x4fc +// CUDA 12.7 and 12.8 use the same offset +// 12.7 tested on 565.77 +// 12.8 tested on 570.124.06 // Offsets for the stream struct on Jetson aarch64 #define CU_9_0_MASK_OFF_JETSON 0x128 @@ -334,6 +338,10 @@ void libsmctrl_set_stream_mask_ext(void* stream, uint128_t mask) { case 12060: hw_mask_v2 = (void*)(stream_struct_base + CU_12_5_MASK_OFF); break; + case 12070: + case 12080: + hw_mask_v2 = (void*)(stream_struct_base + CU_12_7_MASK_OFF); + break; #elif __aarch64__ case 9000: { // Jetson TX2 offset is slightly different on CUDA 9.0. -- cgit v1.2.2