From c250928930cb5c95bffc878913301f9a5d4efcb7 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Mon, 5 May 2025 03:13:30 -0400 Subject: De-duplicate CUDA version checks and omit when building with CUDA > 6.5 Code built with CUDA > 6.5 cannot run on CUDA 6.5 or older, so the check added unecessary overhead. Tested on CUDA 6.5 and CUDA 10.2 to generate the correct code, and global and next tested to work on GTX 1060 3 GB with either build while using CUDA 10.2 at runtime. --- libsmctrl.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/libsmctrl.c b/libsmctrl.c index 5a45611..5ee94fb 100644 --- a/libsmctrl.c +++ b/libsmctrl.c @@ -119,10 +119,23 @@ static void setup_sm_control_callback() { int (*enable)(uint32_t enable, uint32_t hndl, int domain, int cbid); uintptr_t* tbl_base; uint32_t my_hndl; - // Avoid race conditions (setup can only be called once) + // Avoid race conditions (setup should only run once) if (__atomic_test_and_set(&sm_control_setup_called, __ATOMIC_SEQ_CST)) return; +#if CUDA_VERSION <= 6050 + // Verify supported CUDA version + // It's impossible for us to run with a version of CUDA older than we were + // built by, so this check is excluded if built with CUDA > 6.5. + int ver = 0; + cuDriverGetVersion(&ver); + if (ver < 6050) + abort(1, ENOSYS, "Global or next masking requires at least CUDA 6.5; " + "this application is using CUDA %d.%d", + ver / 1000, (ver % 100)); +#endif + + // Set up callback cuGetExportTable((const void**)&tbl_base, &callback_funcs_id); uintptr_t subscribe_func_addr = *(tbl_base + 3); uintptr_t enable_func_addr = *(tbl_base + 6); @@ -139,31 +152,13 @@ static void setup_sm_control_callback() { // Set default mask for all launches void libsmctrl_set_global_mask(uint64_t mask) { - if (!sm_control_setup_called) { - // The version will not change while running, so only check once - int ver = 0; - cuDriverGetVersion(&ver); - if (ver < 6050) - abort(1, ENOSYS, "Global masking requires at least CUDA 6.5; " - "this application is using CUDA %d.%d", - ver / 1000, (ver % 100)); - setup_sm_control_callback(); - } + setup_sm_control_callback(); g_sm_mask = mask; } // Set mask for next launch from this thread void libsmctrl_set_next_mask(uint64_t mask) { - if (!sm_control_setup_called) { - // The version will not change while running, so only check once - int ver = 0; - cuDriverGetVersion(&ver); - if (ver < 6050) - abort(1, ENOSYS, "Next masking requires at least CUDA 6.5; " - "this application is using CUDA %d.%d", - ver / 1000, (ver % 100)); - setup_sm_control_callback(); - } + setup_sm_control_callback(); g_next_sm_mask = mask; } -- cgit v1.2.2