aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2025-05-05 03:13:30 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2025-05-05 03:13:30 -0400
commitc250928930cb5c95bffc878913301f9a5d4efcb7 (patch)
tree8de8e20d727b69c5fa6228ce35daa8b1fbf96c12
parent72ba87e277572eddb25784563faa3eac111c9556 (diff)
De-duplicate CUDA version checks and omit when building with CUDA > 6.5HEADmaster
Code built with CUDA > 6.5 cannot run on CUDA 6.5 or older, so the check added unecessary overhead. Tested on CUDA 6.5 and CUDA 10.2 to generate the correct code, and global and next tested to work on GTX 1060 3 GB with either build while using CUDA 10.2 at runtime.
-rw-r--r--libsmctrl.c37
1 files changed, 16 insertions, 21 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index 5a45611..5ee94fb 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -119,10 +119,23 @@ static void setup_sm_control_callback() {
119 int (*enable)(uint32_t enable, uint32_t hndl, int domain, int cbid); 119 int (*enable)(uint32_t enable, uint32_t hndl, int domain, int cbid);
120 uintptr_t* tbl_base; 120 uintptr_t* tbl_base;
121 uint32_t my_hndl; 121 uint32_t my_hndl;
122 // Avoid race conditions (setup can only be called once) 122 // Avoid race conditions (setup should only run once)
123 if (__atomic_test_and_set(&sm_control_setup_called, __ATOMIC_SEQ_CST)) 123 if (__atomic_test_and_set(&sm_control_setup_called, __ATOMIC_SEQ_CST))
124 return; 124 return;
125 125
126#if CUDA_VERSION <= 6050
127 // Verify supported CUDA version
128 // It's impossible for us to run with a version of CUDA older than we were
129 // built by, so this check is excluded if built with CUDA > 6.5.
130 int ver = 0;
131 cuDriverGetVersion(&ver);
132 if (ver < 6050)
133 abort(1, ENOSYS, "Global or next masking requires at least CUDA 6.5; "
134 "this application is using CUDA %d.%d",
135 ver / 1000, (ver % 100));
136#endif
137
138 // Set up callback
126 cuGetExportTable((const void**)&tbl_base, &callback_funcs_id); 139 cuGetExportTable((const void**)&tbl_base, &callback_funcs_id);
127 uintptr_t subscribe_func_addr = *(tbl_base + 3); 140 uintptr_t subscribe_func_addr = *(tbl_base + 3);
128 uintptr_t enable_func_addr = *(tbl_base + 6); 141 uintptr_t enable_func_addr = *(tbl_base + 6);
@@ -139,31 +152,13 @@ static void setup_sm_control_callback() {
139 152
140// Set default mask for all launches 153// Set default mask for all launches
141void libsmctrl_set_global_mask(uint64_t mask) { 154void libsmctrl_set_global_mask(uint64_t mask) {
142 if (!sm_control_setup_called) { 155 setup_sm_control_callback();
143 // The version will not change while running, so only check once
144 int ver = 0;
145 cuDriverGetVersion(&ver);
146 if (ver < 6050)
147 abort(1, ENOSYS, "Global masking requires at least CUDA 6.5; "
148 "this application is using CUDA %d.%d",
149 ver / 1000, (ver % 100));
150 setup_sm_control_callback();
151 }
152 g_sm_mask = mask; 156 g_sm_mask = mask;
153} 157}
154 158
155// Set mask for next launch from this thread 159// Set mask for next launch from this thread
156void libsmctrl_set_next_mask(uint64_t mask) { 160void libsmctrl_set_next_mask(uint64_t mask) {
157 if (!sm_control_setup_called) { 161 setup_sm_control_callback();
158 // The version will not change while running, so only check once
159 int ver = 0;
160 cuDriverGetVersion(&ver);
161 if (ver < 6050)
162 abort(1, ENOSYS, "Next masking requires at least CUDA 6.5; "
163 "this application is using CUDA %d.%d",
164 ver / 1000, (ver % 100));
165 setup_sm_control_callback();
166 }
167 g_next_sm_mask = mask; 162 g_next_sm_mask = mask;
168} 163}
169 164