Abort process on error, and better document callback-based masking

author: Joshua Bakita <bakitajoshua@gmail.com> 2023-11-29 18:05:01 -0500
committer: Joshua Bakita <bakitajoshua@gmail.com> 2023-11-29 18:24:25 -0500
commit: 973b919cfe6d05fdb3b82f538b1afbc3233a7008 (patch)
tree: 0964c9a9d94345dce6def66c1812e0204374b1ef
parent: 8062646a185baa6d3934d1e19743ac671e943fa8 (diff)
1 files changed, 25 insertions, 17 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index 526331f..817cb5d 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -156,19 +156,29 @@ static uint64_t g_sm_mask = 0;
 static __thread uint64_t g_next_sm_mask = 0;
 static char sm_control_setup_called = 0;
 static void launchCallback(void *ukwn, int domain, int cbid, const void *in_params) {
-        if (*(uint32_t*)in_params < 0x50) {
+        // The third 8-byte element in `in_parms` is a pointer to the stream struct.
-                fprintf(stderr, "Unsupported CUDA version for callback-based SM masking. Aborting...\n");
+        // This exists even when in_params < 0x50. This could be used to implement
-                return;
+        // stream masking without the manual offsets specified elsewhere (store a
-        }
+        // table of stream pointers to masks and do a lookup here).
-        if (!**((uintptr_t***)in_params+8)) {
+        // It could also be used (although not as easily) to support global and next
-                fprintf(stderr, "Called with NULL halLaunchDataAllocation\n");
+        // masking on old CUDA versions, but that would require hooking earlier in the
-                return;
+        // launch process (before the stream mask is applied).
-        }
+        if (*(uint32_t*)in_params < 0x50)
-        //fprintf(stderr, "cta: %lx\n", *(uint64_t*)(**((char***)in_params + 8) + 74));
+                abort(1, 0, "Unsupported CUDA version for callback-based SM masking. Aborting...");
+        // The eighth 8-byte element in `in_params` is a pointer to a struct which
+        // contains a pointer to the TMD as its first element. Note that this eighth
+        // pointer must exist---it only exists when the first 8-byte element of
+        // `in_params` is at least 0x50 (checked above).
+        void* tmd = **((uintptr_t***)in_params + 8);
+        if (!tmd)
+                abort(1, 0, "TMD allocation appears NULL; likely forward-compatibilty issue.\n");
+        //fprintf(stderr, "cta: %lx\n", *(uint64_t*)(tmd + 74));
        // TODO: Check for supported QMD version (>XXX, <4.00)
-        // TODO: Support QMD version 4 (Hopper), where offset starts at +304 (rather than +84) and is 72 bytes (rather than 8 bytes) wide
+        // TODO: Support QMD version 4 (Hopper), where offset starts at +304 (rather than +84) and is 16 bytes (rather than 8 bytes) wide. It also requires an enable bit at +31bits.
-        uint32_t *lower_ptr = (uint32_t*)(**((char***)in_params + 8) + 84);
+        uint32_t *lower_ptr = tmd + 84;
-        uint32_t *upper_ptr = (uint32_t*)(**((char***)in_params + 8) + 88);
+        uint32_t *upper_ptr = tmd + 88;
        if (g_next_sm_mask) {
                *lower_ptr = (uint32_t)g_next_sm_mask;
                *upper_ptr = (uint32_t)(g_next_sm_mask >> 32);
@@ -198,13 +208,11 @@ static void setup_sm_control_11() {
        enable = (typeof(enable))enable_func_addr;
        int res = 0;
        res = subscribe(&my_hndl, launchCallback, NULL);
-        if (res) {
+        if (res)
-                fprintf(stderr, "libsmctrl: Error subscribing to launch callback. Error %d\n", res);
+                abort(1, 0, "Error subscribing to launch callback. CUDA returned error code %d.", res);
-                return;
-        }
        res = enable(1, my_hndl, LAUNCH_DOMAIN, LAUNCH_PRE_UPLOAD);
        if (res)
-                fprintf(stderr, "libsmctrl: Error enabling launch callback. Error %d\n", res);
+                abort(1, 0, "Error enabling launch callback. CUDA returned error code %d.", res);
 }
 // Set default mask for all launches
author	Joshua Bakita <bakitajoshua@gmail.com>	2023-11-29 18:05:01 -0500
committer	Joshua Bakita <bakitajoshua@gmail.com>	2023-11-29 18:24:25 -0500
commit	973b919cfe6d05fdb3b82f538b1afbc3233a7008 (patch)
tree	0964c9a9d94345dce6def66c1812e0204374b1ef
parent	8062646a185baa6d3934d1e19743ac671e943fa8 (diff)