De-duplicate CUDA version checks and omit when building with CUDA > 6.5HEAD master

Code built with CUDA > 6.5 cannot run on CUDA 6.5 or older, so the check added unecessary overhead. Tested on CUDA 6.5 and CUDA 10.2 to generate the correct code, and global and next tested to work on GTX 1060 3 GB with either build while using CUDA 10.2 at runtime.
author: Joshua Bakita <bakitajoshua@gmail.com> 2025-05-05 03:13:30 -0400
committer: Joshua Bakita <bakitajoshua@gmail.com> 2025-05-05 03:13:30 -0400
commit: c250928930cb5c95bffc878913301f9a5d4efcb7 (patch)
tree: 8de8e20d727b69c5fa6228ce35daa8b1fbf96c12
parent: 72ba87e277572eddb25784563faa3eac111c9556 (diff)
1 files changed, 16 insertions, 21 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index 5a45611..5ee94fb 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -119,10 +119,23 @@ static void setup_sm_control_callback() {
        int (*enable)(uint32_t enable, uint32_t hndl, int domain, int cbid);
        uintptr_t* tbl_base;
        uint32_t my_hndl;
-        // Avoid race conditions (setup can only be called once)
+        // Avoid race conditions (setup should only run once)
        if (__atomic_test_and_set(&sm_control_setup_called, __ATOMIC_SEQ_CST))
                return;
+#if CUDA_VERSION <= 6050
+        // Verify supported CUDA version
+        // It's impossible for us to run with a version of CUDA older than we were
+        // built by, so this check is excluded if built with CUDA > 6.5.
+        int ver = 0;
+        cuDriverGetVersion(&ver);
+        if (ver < 6050)
+                abort(1, ENOSYS, "Global or next masking requires at least CUDA 6.5; "
+                                 "this application is using CUDA %d.%d",
+                                 ver / 1000, (ver % 100));
+#endif
+        // Set up callback
        cuGetExportTable((const void**)&tbl_base, &callback_funcs_id);
        uintptr_t subscribe_func_addr = *(tbl_base + 3);
        uintptr_t enable_func_addr = *(tbl_base + 6);
@@ -139,31 +152,13 @@ static void setup_sm_control_callback() {
 // Set default mask for all launches
 void libsmctrl_set_global_mask(uint64_t mask) {
-        if (!sm_control_setup_called) {
+        setup_sm_control_callback();
-                // The version will not change while running, so only check once
-                int ver = 0;
-                cuDriverGetVersion(&ver);
-                if (ver < 6050)
-                        abort(1, ENOSYS, "Global masking requires at least CUDA 6.5; "
-                                         "this application is using CUDA %d.%d",
-                                         ver / 1000, (ver % 100));
-                setup_sm_control_callback();
-        }
        g_sm_mask = mask;
 }
 // Set mask for next launch from this thread
 void libsmctrl_set_next_mask(uint64_t mask) {
-        if (!sm_control_setup_called) {
+        setup_sm_control_callback();
-                // The version will not change while running, so only check once
-                int ver = 0;
-                cuDriverGetVersion(&ver);
-                if (ver < 6050)
-                        abort(1, ENOSYS, "Next masking requires at least CUDA 6.5; "
-                                         "this application is using CUDA %d.%d",
-                                         ver / 1000, (ver % 100));
-                setup_sm_control_callback();
-        }
        g_next_sm_mask = mask;
 }
author	Joshua Bakita <bakitajoshua@gmail.com>	2025-05-05 03:13:30 -0400
committer	Joshua Bakita <bakitajoshua@gmail.com>	2025-05-05 03:13:30 -0400
commit	c250928930cb5c95bffc878913301f9a5d4efcb7 (patch)
tree	8de8e20d727b69c5fa6228ce35daa8b1fbf96c12
parent	72ba87e277572eddb25784563faa3eac111c9556 (diff)