diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2025-05-05 03:13:30 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2025-05-05 03:13:30 -0400 |
commit | c250928930cb5c95bffc878913301f9a5d4efcb7 (patch) | |
tree | 8de8e20d727b69c5fa6228ce35daa8b1fbf96c12 | |
parent | 72ba87e277572eddb25784563faa3eac111c9556 (diff) |
Code built with CUDA > 6.5 cannot run on CUDA 6.5 or older, so the
check added unecessary overhead.
Tested on CUDA 6.5 and CUDA 10.2 to generate the correct code, and
global and next tested to work on GTX 1060 3 GB with either build
while using CUDA 10.2 at runtime.
-rw-r--r-- | libsmctrl.c | 37 |
1 files changed, 16 insertions, 21 deletions
diff --git a/libsmctrl.c b/libsmctrl.c index 5a45611..5ee94fb 100644 --- a/libsmctrl.c +++ b/libsmctrl.c | |||
@@ -119,10 +119,23 @@ static void setup_sm_control_callback() { | |||
119 | int (*enable)(uint32_t enable, uint32_t hndl, int domain, int cbid); | 119 | int (*enable)(uint32_t enable, uint32_t hndl, int domain, int cbid); |
120 | uintptr_t* tbl_base; | 120 | uintptr_t* tbl_base; |
121 | uint32_t my_hndl; | 121 | uint32_t my_hndl; |
122 | // Avoid race conditions (setup can only be called once) | 122 | // Avoid race conditions (setup should only run once) |
123 | if (__atomic_test_and_set(&sm_control_setup_called, __ATOMIC_SEQ_CST)) | 123 | if (__atomic_test_and_set(&sm_control_setup_called, __ATOMIC_SEQ_CST)) |
124 | return; | 124 | return; |
125 | 125 | ||
126 | #if CUDA_VERSION <= 6050 | ||
127 | // Verify supported CUDA version | ||
128 | // It's impossible for us to run with a version of CUDA older than we were | ||
129 | // built by, so this check is excluded if built with CUDA > 6.5. | ||
130 | int ver = 0; | ||
131 | cuDriverGetVersion(&ver); | ||
132 | if (ver < 6050) | ||
133 | abort(1, ENOSYS, "Global or next masking requires at least CUDA 6.5; " | ||
134 | "this application is using CUDA %d.%d", | ||
135 | ver / 1000, (ver % 100)); | ||
136 | #endif | ||
137 | |||
138 | // Set up callback | ||
126 | cuGetExportTable((const void**)&tbl_base, &callback_funcs_id); | 139 | cuGetExportTable((const void**)&tbl_base, &callback_funcs_id); |
127 | uintptr_t subscribe_func_addr = *(tbl_base + 3); | 140 | uintptr_t subscribe_func_addr = *(tbl_base + 3); |
128 | uintptr_t enable_func_addr = *(tbl_base + 6); | 141 | uintptr_t enable_func_addr = *(tbl_base + 6); |
@@ -139,31 +152,13 @@ static void setup_sm_control_callback() { | |||
139 | 152 | ||
140 | // Set default mask for all launches | 153 | // Set default mask for all launches |
141 | void libsmctrl_set_global_mask(uint64_t mask) { | 154 | void libsmctrl_set_global_mask(uint64_t mask) { |
142 | if (!sm_control_setup_called) { | 155 | setup_sm_control_callback(); |
143 | // The version will not change while running, so only check once | ||
144 | int ver = 0; | ||
145 | cuDriverGetVersion(&ver); | ||
146 | if (ver < 6050) | ||
147 | abort(1, ENOSYS, "Global masking requires at least CUDA 6.5; " | ||
148 | "this application is using CUDA %d.%d", | ||
149 | ver / 1000, (ver % 100)); | ||
150 | setup_sm_control_callback(); | ||
151 | } | ||
152 | g_sm_mask = mask; | 156 | g_sm_mask = mask; |
153 | } | 157 | } |
154 | 158 | ||
155 | // Set mask for next launch from this thread | 159 | // Set mask for next launch from this thread |
156 | void libsmctrl_set_next_mask(uint64_t mask) { | 160 | void libsmctrl_set_next_mask(uint64_t mask) { |
157 | if (!sm_control_setup_called) { | 161 | setup_sm_control_callback(); |
158 | // The version will not change while running, so only check once | ||
159 | int ver = 0; | ||
160 | cuDriverGetVersion(&ver); | ||
161 | if (ver < 6050) | ||
162 | abort(1, ENOSYS, "Next masking requires at least CUDA 6.5; " | ||
163 | "this application is using CUDA %d.%d", | ||
164 | ver / 1000, (ver % 100)); | ||
165 | setup_sm_control_callback(); | ||
166 | } | ||
167 | g_next_sm_mask = mask; | 162 | g_next_sm_mask = mask; |
168 | } | 163 | } |
169 | 164 | ||