aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--litmus/Kconfig50
-rw-r--r--litmus/nvidia_info.c1525
2 files changed, 828 insertions, 747 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 3d3a3ec71243..32c1c92cb56f 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -538,40 +538,54 @@ config NV_DEVICE_NUM
538 538
539choice 539choice
540 prompt "CUDA/Driver Version Support" 540 prompt "CUDA/Driver Version Support"
541 default CUDA_5_0 541 default NV_DRV_319_37
542 depends on LITMUS_NVIDIA 542 depends on LITMUS_NVIDIA
543 help 543 help
544 Select the version of CUDA/driver to support. 544 Select the version of NVIDIA driver to support.
545 Note: Some of the configurations below may work
546 with other versions of the NVIDIA driver, but
547 the layouts of data structures in litmus/nvidia_info.c
548 will have to be manually compared against
549 <driver>/kernel/nv.h and nv-linux.h in the driver's
550 GPL shim layer.
551
552config NV_DRV_331_13
553 bool "331.13 (post-CUDA 5.5)
554 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS
555 help
556 NV Driver 331.13. (An updated driver released
557 after CUDA 5.5.)
545 558
546config CUDA_5_X 559config NV_DRV_325_15
547 bool "CUDA 5.0+" 560 bool "325.15 (post-CUDA 5.5)
548 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS 561 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS
549 help 562 help
550 Support CUDA 5.0 (dev. driver version: x86_64-310.x) 563 NV Driver 325.15. (An updated driver released
564 after CUDA 5.5.)
551 565
552config CUDA_5_0 566config NV_DRV_319_37
553 bool "CUDA 5.0" 567 bool "319.37 (CUDA 5.5)"
554 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS 568 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS
555 help 569 help
556 Support CUDA 5.0 (dev. driver version: x86_64-304.54) 570 NV Driver 319.37. (distributed with CUDA 5.5)
557 571
558config CUDA_4_2 572config NV_DRV_304_54
559 bool "CUDA 4.2" 573 bool "304.54 (CUDA 5.0)"
560 depends on LITMUS_NVIDIA 574 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS
561 help 575 help
562 Support CUDA 4.2 (dev driver version: x86_64-295.40) 576 NV Driver 304.54. (distributed with CUDA 5.0)
563 577
564config CUDA_4_0 578config NV_DRV_295_40
565 bool "CUDA 4.0" 579 bool "295.40 (CUDA 4.2)"
566 depends on LITMUS_NVIDIA 580 depends on LITMUS_NVIDIA
567 help 581 help
568 Support CUDA 4.0 (dev. driver version: x86_64-270.41) 582 NV Driver 295.40. (distributed with CUDA 4.2)
569 583
570config CUDA_3_2 584config NV_DRV_270_41
571 bool "CUDA 3.2" 585 bool "270.41 (CUDA 4.0)"
572 depends on LITMUS_NVIDIA 586 depends on LITMUS_NVIDIA
573 help 587 help
574 Support CUDA 3.2 (dev. driver version: x86_64-260.24) 588 NV Driver 270.41. (distributed with CUDA 4.0)
575 589
576endchoice 590endchoice
577 591
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 0050ce65e521..6a101882e615 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -14,6 +14,39 @@
14#include <litmus/litmus_softirq.h> 14#include <litmus/litmus_softirq.h>
15#endif 15#endif
16 16
17#if defined(CONFIG_NV_DRV_331_13)
18#define NV_MAJOR_V 331
19#define NV_MINOR_V 13
20#elif defined(CONFIG_NV_DRV_325_15)
21#define NV_MAJOR_V 325
22#define NV_MINOR_V 15
23#elif defined(CONFIG_NV_DRV_319_37)
24#define NV_MAJOR_V 319
25#define NV_MINOR_V 37
26#elif defined(CONFIG_NV_DRV_304_54)
27#define NV_MAJOR_V 304
28#define NV_MINOR_V 54
29#elif defined(CONFIG_NV_DRV_295_40)
30#define NV_MAJOR_V 295
31#define NV_MINOR_V 40
32#elif defined(CONFIG_NV_DRV_270_41)
33#define NV_MAJOR_V 279
34#define NV_MINOR_V 41
35#else
36#error "Unsupported NV Driver"
37#endif
38
39#if NV_MAJOR_V >= 319
40#include <drm/drmP.h>
41#endif
42
43/* The following structures map to structers found in the GPL layer
44 of the NVIDIA-disributed binary blob driver. Much of the code
45 is cobbled together from various versions of the NV driver. We
46 can factor this out into a separate tool that gives memory offsets
47 to determine the device ID if distributing this code ever becomes
48 a problem. */
49
17typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ 50typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
18typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ 51typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
19typedef unsigned char NvU8; /* 0 to 255 */ 52typedef unsigned char NvU8; /* 0 to 255 */
@@ -25,6 +58,7 @@ typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
25typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ 58typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
26typedef unsigned int NvU32; /* 0 to 4294967295 */ 59typedef unsigned int NvU32; /* 0 to 4294967295 */
27typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ 60typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
61typedef NvU8 NvBool;
28typedef union 62typedef union
29{ 63{
30 volatile NvV8 Reg008[1]; 64 volatile NvV8 Reg008[1];
@@ -35,8 +69,8 @@ typedef union
35typedef struct 69typedef struct
36{ 70{
37 NvU64 address; 71 NvU64 address;
38#if defined(CONFIG_CUDA_4_2) || defined(CONFIG_CUDA_5_0) || defined(CONFIG_CUDA_5_X) 72#if NV_MAJOR_V >= 295
39 NvU64 strapped_size; 73 NvU64 strapped_size;
40#endif 74#endif
41 NvU64 size; 75 NvU64 size;
42 NvU32 offset; 76 NvU32 offset;
@@ -44,40 +78,61 @@ typedef struct
44 litmus_nv_phwreg_t map_u; 78 litmus_nv_phwreg_t map_u;
45} litmus_nv_aperture_t; 79} litmus_nv_aperture_t;
46 80
81#if NV_MAJOR_V >= 331
82typedef struct
83{
84 NvU32 domain;
85 NvU8 bus;
86 NvU8 slot;
87 NvU16 vendor_id;
88 NvU16 device_id;
89 NvBool valid;
90} litmus_pci_info_t;
91#endif
92
47typedef struct 93typedef struct
48{ 94{
49 void *priv; /* private data */ 95 void *priv; /* private data */
50 void *os_state; /* os-specific device state */ 96 void *os_state; /* os-specific device state */
51 97
52#ifdef CONFIG_CUDA_4_0 98#if NV_MAJOR_V == 270
53 int rmInitialized; /* TODO: appears in CUDA 3_2? */ 99 int rmInitialized;
54#endif 100#endif
55 int flags; 101 int flags;
56 102
103#if NV_MAJOR_V <= 331
57 /* PCI config info */ 104 /* PCI config info */
58 NvU32 domain; 105 NvU32 domain;
59 NvU16 bus; 106 NvU16 bus;
60 NvU16 slot; 107 NvU16 slot;
61 NvU16 vendor_id; 108 NvU16 vendor_id;
62 NvU16 device_id; 109 NvU16 device_id;
110#else
111 litmus_pci_info_t pci_info;
112#endif
113
63 NvU16 subsystem_id; 114 NvU16 subsystem_id;
64 NvU32 gpu_id; 115 NvU32 gpu_id;
65 void *handle; 116 void *handle;
66 117
118#if NV_MAJOR_V < 325
67 NvU32 pci_cfg_space[16]; 119 NvU32 pci_cfg_space[16];
120#else
121 NvU32 pci_cfg_space[64];
122#endif
68 123
69 /* physical characteristics */ 124 /* physical characteristics */
70 litmus_nv_aperture_t bars[3]; 125 litmus_nv_aperture_t bars[3];
71 litmus_nv_aperture_t *regs; 126 litmus_nv_aperture_t *regs;
72 litmus_nv_aperture_t *fb, ud; 127 litmus_nv_aperture_t *fb, ud;
73 128
74#ifndef CONFIG_CUDA_5_X 129#if NV_MAJOR_V < 325
75 litmus_nv_aperture_t agp; 130 litmus_nv_aperture_t agp;
76#endif 131#endif
77 132
78 NvU32 interrupt_line; 133 NvU32 interrupt_line;
79 134
80#ifndef CONFIG_CUDA_5_X 135#if NV_MAJOR_V < 325
81 NvU32 agp_config; 136 NvU32 agp_config;
82 NvU32 agp_status; 137 NvU32 agp_status;
83#endif 138#endif
@@ -106,9 +161,9 @@ typedef struct litmus_nv_linux_state_s {
106 litmus_nv_state_t nv_state; 161 litmus_nv_state_t nv_state;
107 atomic_t usage_count; 162 atomic_t usage_count;
108 163
109 struct pci_dev *dev; 164 struct pci_dev *dev;
110 165
111#ifndef CONFIG_CUDA_5_X 166#if NV_MAJOR_V < 325
112 void *agp_bridge; 167 void *agp_bridge;
113#endif 168#endif
114 169
@@ -118,10 +173,7 @@ typedef struct litmus_nv_linux_state_s {
118 void *isr_sp; 173 void *isr_sp;
119 void *pci_cfgchk_sp; 174 void *pci_cfgchk_sp;
120 void *isr_bh_sp; 175 void *isr_bh_sp;
121 176 char registry_keys[512];
122#ifndef CONFIG_CUDA_3_2
123 char registry_keys[512];
124#endif
125 177
126 /* keep track of any pending bottom halfes */ 178 /* keep track of any pending bottom halfes */
127 struct tasklet_struct tasklet; 179 struct tasklet_struct tasklet;
@@ -136,8 +188,15 @@ typedef struct litmus_nv_linux_state_s {
136 /* lock for linux-specific alloc queue */ 188 /* lock for linux-specific alloc queue */
137 struct semaphore at_lock; 189 struct semaphore at_lock;
138 190
191 /* !!! This field is all that we're after to determine
192 !!! the device number of the GPU that spawned a given
193 vvv tasklet or workqueue item. */
139 NvU32 device_num; 194 NvU32 device_num;
140 struct litmus_nv_linux_state_s *next; 195 struct litmus_nv_linux_state_s *next;
196
197#if NV_MAJOR_V >= 319
198 struct drm_device *drm;
199#endif
141} litmus_nv_linux_state_t; 200} litmus_nv_linux_state_t;
142 201
143 202
@@ -145,85 +204,93 @@ typedef struct litmus_nv_linux_state_s {
145static void __attribute__((unused)) 204static void __attribute__((unused))
146dump_nvidia_info(const struct tasklet_struct *t) 205dump_nvidia_info(const struct tasklet_struct *t)
147{ 206{
148 litmus_nv_state_t* nvstate = NULL; 207 litmus_nv_state_t* nvstate = NULL;
149 litmus_nv_linux_state_t* linuxstate = NULL; 208 litmus_nv_linux_state_t* linuxstate = NULL;
150 struct pci_dev* pci = NULL; 209 struct pci_dev* pci = NULL;
151 210
152 nvstate = (litmus_nv_state_t*)(t->data); 211 nvstate = (litmus_nv_state_t*)(t->data);
153 212
154 if(nvstate) 213 if(nvstate)
155 { 214 {
156 TRACE("NV State:\n" 215 TRACE("NV State:\n"
157 "\ttasklet ptr = %p\n" 216 "\ttasklet ptr = %p\n"
158 "\tstate ptr = %p\n" 217 "\tstate ptr = %p\n"
159 "\tprivate data ptr = %p\n" 218 "\tprivate data ptr = %p\n"
160 "\tos state ptr = %p\n" 219 "\tos state ptr = %p\n"
161 "\tdomain = %u\n" 220 "\tdomain = %u\n"
162 "\tbus = %u\n" 221 "\tbus = %u\n"
163 "\tslot = %u\n" 222 "\tslot = %u\n"
164 "\tvender_id = %u\n" 223 "\tvender_id = %u\n"
165 "\tdevice_id = %u\n" 224 "\tdevice_id = %u\n"
166 "\tsubsystem_id = %u\n" 225 "\tsubsystem_id = %u\n"
167 "\tgpu_id = %u\n" 226 "\tgpu_id = %u\n"
168 "\tinterrupt_line = %u\n", 227 "\tinterrupt_line = %u\n",
169 t, 228 t,
170 nvstate, 229 nvstate,
171 nvstate->priv, 230 nvstate->priv,
172 nvstate->os_state, 231 nvstate->os_state,
173 nvstate->domain, 232#if NV_MAJOR_V <= 331
174 nvstate->bus, 233 nvstate->domain,
175 nvstate->slot, 234 nvstate->bus,
176 nvstate->vendor_id, 235 nvstate->slot,
177 nvstate->device_id, 236 nvstate->vendor_id,
178 nvstate->subsystem_id, 237 nvstate->device_id,
179 nvstate->gpu_id, 238#else
180 nvstate->interrupt_line); 239 nvstate->pci_info.domain,
181 240 nvstate->pci_info.bus,
182 linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); 241 nvstate->pci_info.slot,
183 } 242 nvstate->pci_info.vendor_id,
184 else 243 nvstate->pci_info.device_id,
185 { 244#endif
186 TRACE("INVALID NVSTATE????\n"); 245 nvstate->subsystem_id,
187 } 246 nvstate->gpu_id,
188 247 nvstate->interrupt_line);
189 if(linuxstate) 248
190 { 249 linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
191 int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); 250 }
192 int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); 251 else
193 int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); 252 {
194 253 TRACE("INVALID NVSTATE????\n");
195 TRACE("LINUX NV State:\n" 254 }
196 "\tlinux nv state ptr: %p\n" 255
197 "\taddress of tasklet: %p\n" 256 if(linuxstate)
198 "\taddress of work: %p\n" 257 {
199 "\tusage_count: %d\n" 258 int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
200 "\tdevice_num: %u\n" 259 int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
201 "\ttasklet addr == this tasklet: %d\n" 260 int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
202 "\tpci: %p\n", 261
203 linuxstate, 262 TRACE("LINUX NV State:\n"
204 &(linuxstate->tasklet), 263 "\tlinux nv state ptr: %p\n"
205 &(linuxstate->work), 264 "\taddress of tasklet: %p\n"
206 atomic_read(&(linuxstate->usage_count)), 265 "\taddress of work: %p\n"
207 linuxstate->device_num, 266 "\tusage_count: %d\n"
208 (t == &(linuxstate->tasklet)), 267 "\tdevice_num: %u\n"
209 linuxstate->dev); 268 "\ttasklet addr == this tasklet: %d\n"
210 269 "\tpci: %p\n",
211 pci = linuxstate->dev; 270 linuxstate,
212 271 &(linuxstate->tasklet),
213 TRACE("Offsets:\n" 272 &(linuxstate->work),
214 "\tOffset from LinuxState: %d, %x\n" 273 atomic_read(&(linuxstate->usage_count)),
215 "\tOffset from NVState: %d, %x\n" 274 linuxstate->device_num,
216 "\tOffset from parameter: %d, %x\n" 275 (t == &(linuxstate->tasklet)),
217 "\tdevice_num: %u\n", 276 linuxstate->dev);
218 ls_offset, ls_offset, 277
219 ns_offset_raw, ns_offset_raw, 278 pci = linuxstate->dev;
220 ns_offset_desired, ns_offset_desired, 279
221 *((u32*)((void*)nvstate + ns_offset_desired))); 280 TRACE("Offsets:\n"
222 } 281 "\tOffset from LinuxState: %d, %x\n"
223 else 282 "\tOffset from NVState: %d, %x\n"
224 { 283 "\tOffset from parameter: %d, %x\n"
225 TRACE("INVALID LINUXNVSTATE?????\n"); 284 "\tdevice_num: %u\n",
226 } 285 ls_offset, ls_offset,
286 ns_offset_raw, ns_offset_raw,
287 ns_offset_desired, ns_offset_desired,
288 *((u32*)((void*)nvstate + ns_offset_desired)));
289 }
290 else
291 {
292 TRACE("INVALID LINUXNVSTATE?????\n");
293 }
227} 294}
228#endif 295#endif
229 296
@@ -235,120 +302,120 @@ static int shutdown_nv_device_reg(void);
235void shutdown_nvidia_info(void); 302void shutdown_nvidia_info(void);
236 303
237static int nvidia_going_module_notify(struct notifier_block *self, 304static int nvidia_going_module_notify(struct notifier_block *self,
238 unsigned long val, void *data) 305 unsigned long val, void *data)
239{ 306{
240 struct module *mod = data; 307 struct module *mod = data;
241 308
242 if (nvidia_mod && (mod == nvidia_mod)) { 309 if (nvidia_mod && (mod == nvidia_mod)) {
243 switch (val) { 310 switch (val) {
244 case MODULE_STATE_GOING: 311 case MODULE_STATE_GOING:
245 /* just set our mod reference to null to avoid crash */ 312 /* just set our mod reference to null to avoid crash */
246 nvidia_mod = NULL; 313 nvidia_mod = NULL;
247 mb(); 314 mb();
248 break; 315 break;
249 default: 316 default:
250 break; 317 break;
251 } 318 }
252 } 319 }
253 320
254 return 0; 321 return 0;
255} 322}
256 323
257static struct notifier_block nvidia_going = { 324static struct notifier_block nvidia_going = {
258 .notifier_call = nvidia_going_module_notify, 325 .notifier_call = nvidia_going_module_notify,
259 .priority = 1, 326 .priority = 1,
260}; 327};
261 328
262 329
263struct init_nvinfo_wq_data 330struct init_nvinfo_wq_data
264{ 331{
265 struct work_struct work; 332 struct work_struct work;
266}; 333};
267 334
268static void __init_nvidia_info(struct work_struct *w) 335static void __init_nvidia_info(struct work_struct *w)
269{ 336{
270 struct init_nvinfo_wq_data *work = 337 struct init_nvinfo_wq_data *work =
271 container_of(w, struct init_nvinfo_wq_data, work); 338 container_of(w, struct init_nvinfo_wq_data, work);
272 struct module* mod; 339 struct module* mod;
273 340
274 mutex_lock(&module_mutex); 341 mutex_lock(&module_mutex);
275 mod = find_module("nvidia"); 342 mod = find_module("nvidia");
276 mutex_unlock(&module_mutex); 343 mutex_unlock(&module_mutex);
277 344
278 if(mod != NULL) { 345 if(mod != NULL) {
279 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, 346 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
280 (void*)(mod->module_core), 347 (void*)(mod->module_core),
281 (void*)(mod->module_core) + mod->core_size); 348 (void*)(mod->module_core) + mod->core_size);
282 349
283 init_nv_device_reg(); 350 init_nv_device_reg();
284 nvidia_mod = mod; /* make module visible to others */ 351 nvidia_mod = mod; /* make module visible to others */
285 register_module_notifier(&nvidia_going); 352 register_module_notifier(&nvidia_going);
286 } 353 }
287 else { 354 else {
288 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); 355 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
289 init_nv_device_reg(); 356 init_nv_device_reg();
290 } 357 }
291 358
292 kfree(work); 359 kfree(work);
293} 360}
294 361
295int init_nvidia_info(void) 362int init_nvidia_info(void)
296{ 363{
297 struct init_nvinfo_wq_data *wq_job = 364 struct init_nvinfo_wq_data *wq_job =
298 kmalloc(sizeof(struct init_nvinfo_wq_data), GFP_ATOMIC); 365 kmalloc(sizeof(struct init_nvinfo_wq_data), GFP_ATOMIC);
299 INIT_WORK(&wq_job->work, __init_nvidia_info); 366 INIT_WORK(&wq_job->work, __init_nvidia_info);
300 schedule_work(&wq_job->work); 367 schedule_work(&wq_job->work);
301 return 0; 368 return 0;
302} 369}
303 370
304void shutdown_nvidia_info(void) 371void shutdown_nvidia_info(void)
305{ 372{
306 if (nvidia_mod) { 373 if (nvidia_mod) {
307 nvidia_mod = NULL; 374 nvidia_mod = NULL;
308 mb(); 375 mb();
309 376
310 unregister_module_notifier(&nvidia_going); 377 unregister_module_notifier(&nvidia_going);
311 shutdown_nv_device_reg(); 378 shutdown_nv_device_reg();
312 } 379 }
313} 380}
314 381
315/* works with pointers to static data inside the module too. */ 382/* works with pointers to static data inside the module too. */
316int is_nvidia_func(void* func_addr) 383int is_nvidia_func(void* func_addr)
317{ 384{
318 int ret = 0; 385 int ret = 0;
319 struct module* mod = nvidia_mod; 386 struct module* mod = nvidia_mod;
320 if(mod) 387 if(mod)
321 { 388 {
322 ret = within_module_core((long unsigned int)func_addr, mod); 389 ret = within_module_core((long unsigned int)func_addr, mod);
323 /* 390 /*
324 if(ret) 391 if(ret)
325 { 392 {
326 TRACE("%s : %p is in NVIDIA module: %d\n", 393 TRACE("%s : %p is in NVIDIA module: %d\n",
327 __FUNCTION__, func_addr, ret); 394 __FUNCTION__, func_addr, ret);
328 }*/ 395 }*/
329 } 396 }
330 397
331 return(ret); 398 return(ret);
332} 399}
333 400
334u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) 401u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
335{ 402{
336 // life is too short to use hard-coded offsets. update this later. 403 // life is too short to use hard-coded offsets. update this later.
337 litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); 404 litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
338 litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); 405 litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
339 406
340 BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); 407 BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
341 408
342 return(linuxstate->device_num); 409 return(linuxstate->device_num);
343} 410}
344 411
345u32 get_work_nv_device_num(const struct work_struct *t) 412u32 get_work_nv_device_num(const struct work_struct *t)
346{ 413{
347 // offset determined though observed behavior of the NV driver. 414 // offset determined though observed behavior of the NV driver.
348 const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); 415 const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
349 void* state = (void*)(t); 416 void* state = (void*)(t);
350 void** device_num_ptr = state + DEVICE_NUM_OFFSET; 417 void** device_num_ptr = state + DEVICE_NUM_OFFSET;
351 return(*((u32*)(*device_num_ptr))); 418 return(*((u32*)(*device_num_ptr)));
352} 419}
353 420
354 421
@@ -358,23 +425,23 @@ u32 get_work_nv_device_num(const struct work_struct *t)
358 425
359 426
360typedef struct { 427typedef struct {
361 raw_spinlock_t lock; 428 raw_spinlock_t lock;
362 struct binheap owners; 429 struct binheap owners;
363 430
364#ifdef CONFIG_LITMUS_SOFTIRQD 431#ifdef CONFIG_LITMUS_SOFTIRQD
365 klmirqd_callback_t interrupt_callback; 432 klmirqd_callback_t interrupt_callback;
366 struct task_struct* interrupt_thread; 433 struct task_struct* interrupt_thread;
367 unsigned int interrupt_ready:1; /* todo: make threads check for the ready flag */ 434 unsigned int interrupt_ready:1; /* todo: make threads check for the ready flag */
368 435
369#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 436#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
370 klmirqd_callback_t workq_callback; 437 klmirqd_callback_t workq_callback;
371 struct task_struct* workq_thread; 438 struct task_struct* workq_thread;
372 unsigned int workq_ready:1; 439 unsigned int workq_ready:1;
373#endif 440#endif
374#endif 441#endif
375 442
376#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG 443#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
377 struct tasklet_struct nv_klmirqd_dbg_tasklet; 444 struct tasklet_struct nv_klmirqd_dbg_tasklet;
378#endif 445#endif
379}nv_device_registry_t; 446}nv_device_registry_t;
380 447
@@ -386,35 +453,35 @@ static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
386#ifdef CONFIG_LITMUS_SOFTIRQD 453#ifdef CONFIG_LITMUS_SOFTIRQD
387static int nvidia_launch_interrupt_cb(void *arg) 454static int nvidia_launch_interrupt_cb(void *arg)
388{ 455{
389 unsigned long flags; 456 unsigned long flags;
390 int reg_device_id = (int)(long long)(arg); 457 int reg_device_id = (int)(long long)(arg);
391 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; 458 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
392 459
393 TRACE("nvklmirqd callback for GPU %d\n", reg_device_id); 460 TRACE("nvklmirqd callback for GPU %d\n", reg_device_id);
394 461
395 raw_spin_lock_irqsave(&reg->lock, flags); 462 raw_spin_lock_irqsave(&reg->lock, flags);
396 reg->interrupt_thread = current; 463 reg->interrupt_thread = current;
397 reg->interrupt_ready = 1; 464 reg->interrupt_ready = 1;
398 raw_spin_unlock_irqrestore(&reg->lock, flags); 465 raw_spin_unlock_irqrestore(&reg->lock, flags);
399 466
400 return 0; 467 return 0;
401} 468}
402 469
403#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 470#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
404static int nvidia_launch_workq_cb(void *arg) 471static int nvidia_launch_workq_cb(void *arg)
405{ 472{
406 unsigned long flags; 473 unsigned long flags;
407 int reg_device_id = (int)(long long)(arg); 474 int reg_device_id = (int)(long long)(arg);
408 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; 475 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
409 476
410 TRACE("nvklmworkerd callback for GPU %d\n", reg_device_id); 477 TRACE("nvklmworkerd callback for GPU %d\n", reg_device_id);
411 478
412 raw_spin_lock_irqsave(&reg->lock, flags); 479 raw_spin_lock_irqsave(&reg->lock, flags);
413 reg->workq_thread = current; 480 reg->workq_thread = current;
414 reg->workq_ready = 1; 481 reg->workq_ready = 1;
415 raw_spin_unlock_irqrestore(&reg->lock, flags); 482 raw_spin_unlock_irqrestore(&reg->lock, flags);
416 483
417 return 0; 484 return 0;
418} 485}
419#endif 486#endif
420#endif 487#endif
@@ -422,133 +489,133 @@ static int nvidia_launch_workq_cb(void *arg)
422#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG 489#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
423struct nv_klmirqd_dbg_timer_struct 490struct nv_klmirqd_dbg_timer_struct
424{ 491{
425 struct hrtimer timer; 492 struct hrtimer timer;
426}; 493};
427 494
428static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer; 495static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer;
429 496
430static void nv_klmirqd_arm_dbg_timer(lt_t relative_time) 497static void nv_klmirqd_arm_dbg_timer(lt_t relative_time)
431{ 498{
432 lt_t when_to_fire = litmus_clock() + relative_time; 499 lt_t when_to_fire = litmus_clock() + relative_time;
433 500
434 TRACE("next nv tasklet in %d ns\n", relative_time); 501 TRACE("next nv tasklet in %d ns\n", relative_time);
435 502
436 __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer, 503 __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer,
437 ns_to_ktime(when_to_fire), 504 ns_to_ktime(when_to_fire),
438 0, 505 0,
439 HRTIMER_MODE_ABS_PINNED, 506 HRTIMER_MODE_ABS_PINNED,
440 0); 507 0);
441} 508}
442 509
443static void nv_klmirqd_dbg_tasklet_func(unsigned long arg) 510static void nv_klmirqd_dbg_tasklet_func(unsigned long arg)
444{ 511{
445 lt_t now = litmus_clock(); 512 lt_t now = litmus_clock();
446 nv_device_registry_t *reg = (nv_device_registry_t*)arg; 513 nv_device_registry_t *reg = (nv_device_registry_t*)arg;
447 int gpunum = reg - &NV_DEVICE_REG[0]; 514 int gpunum = reg - &NV_DEVICE_REG[0];
448 515
449 TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum); 516 TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum);
450 517
451 /* set up the next timer */ 518 /* set up the next timer */
452 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. 519 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
453} 520}
454 521
455 522
456static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer) 523static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer)
457{ 524{
458 lt_t now = litmus_clock(); 525 lt_t now = litmus_clock();
459 int gpu = (int)(now % num_online_gpus()); 526 int gpu = (int)(now % num_online_gpus());
460 nv_device_registry_t *reg; 527 nv_device_registry_t *reg;
461 528
462 TRACE("nvklmirqd_timer invoked!\n"); 529 TRACE("nvklmirqd_timer invoked!\n");
463 530
464 reg = &NV_DEVICE_REG[gpu]; 531 reg = &NV_DEVICE_REG[gpu];
465 532
466 if (reg->interrupt_thread && reg->interrupt_ready) { 533 if (reg->interrupt_thread && reg->interrupt_ready) {
467 TRACE("Adding a tasklet for GPU %d\n", gpu); 534 TRACE("Adding a tasklet for GPU %d\n", gpu);
468 litmus_tasklet_schedule(&reg->nv_klmirqd_dbg_tasklet, reg->interrupt_thread); 535 litmus_tasklet_schedule(&reg->nv_klmirqd_dbg_tasklet, reg->interrupt_thread);
469 } 536 }
470 else { 537 else {
471 TRACE("nv klmirqd is not ready!\n"); 538 TRACE("nv klmirqd is not ready!\n");
472 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. 539 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
473 } 540 }
474 541
475 return HRTIMER_NORESTART; 542 return HRTIMER_NORESTART;
476} 543}
477#endif 544#endif
478 545
479 546
480static int gpu_owner_max_priority_order(struct binheap_node *a, 547static int gpu_owner_max_priority_order(struct binheap_node *a,
481 struct binheap_node *b) 548 struct binheap_node *b)
482{ 549{
483 struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node), 550 struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node),
484 struct task_struct, rt_param); 551 struct task_struct, rt_param);
485 struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node), 552 struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node),
486 struct task_struct, rt_param); 553 struct task_struct, rt_param);
487 554
488 BUG_ON(!d_a); 555 BUG_ON(!d_a);
489 BUG_ON(!d_b); 556 BUG_ON(!d_b);
490 557
491 return litmus->compare(d_a, d_b); 558 return litmus->compare(d_a, d_b);
492} 559}
493 560
494static int init_nv_device_reg(void) 561static int init_nv_device_reg(void)
495{ 562{
496 int i; 563 int i;
497 564
498#ifdef CONFIG_LITMUS_SOFTIRQD 565#ifdef CONFIG_LITMUS_SOFTIRQD
499 if (!klmirqd_is_ready()) { 566 if (!klmirqd_is_ready()) {
500 TRACE("klmirqd is not ready!\n"); 567 TRACE("klmirqd is not ready!\n");
501 return 0; 568 return 0;
502 } 569 }
503#endif 570#endif
504 571
505 memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); 572 memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
506 mb(); 573 mb();
507 574
508 for(i = 0; i < num_online_gpus(); ++i) { 575 for(i = 0; i < num_online_gpus(); ++i) {
509 raw_spin_lock_init(&NV_DEVICE_REG[i].lock); 576 raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
510 INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); 577 INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
511 578
512#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG 579#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
513 tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, 580 tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet,
514 nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]); 581 nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]);
515#endif 582#endif
516 583
517#ifdef CONFIG_LITMUS_SOFTIRQD 584#ifdef CONFIG_LITMUS_SOFTIRQD
518 { 585 {
519 char name[MAX_KLMIRQD_NAME_LEN+1]; 586 char name[MAX_KLMIRQD_NAME_LEN+1];
520 int default_cpu = litmus->map_gpu_to_cpu(i); 587 int default_cpu = litmus->map_gpu_to_cpu(i);
521 588
522 /* spawn the interrupt thread */ 589 /* spawn the interrupt thread */
523 snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i); 590 snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i);
524 NV_DEVICE_REG[i].interrupt_callback.func = nvidia_launch_interrupt_cb; 591 NV_DEVICE_REG[i].interrupt_callback.func = nvidia_launch_interrupt_cb;
525 NV_DEVICE_REG[i].interrupt_callback.arg = (void*)(long long)(i); 592 NV_DEVICE_REG[i].interrupt_callback.arg = (void*)(long long)(i);
526 mb(); 593 mb();
527 if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].interrupt_callback) != 0) { 594 if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].interrupt_callback) != 0) {
528 TRACE("Failed to create nvklmirqd thread for GPU %d\n", i); 595 TRACE("Failed to create nvklmirqd thread for GPU %d\n", i);
529 } 596 }
530 597
531#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 598#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
532 /* spawn the workqueue thread */ 599 /* spawn the workqueue thread */
533 snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmworker%d", i); 600 snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmworker%d", i);
534 NV_DEVICE_REG[i].workq_callback.func = nvidia_launch_workq_cb; 601 NV_DEVICE_REG[i].workq_callback.func = nvidia_launch_workq_cb;
535 NV_DEVICE_REG[i].workq_callback.arg = (void*)(long long)(i); 602 NV_DEVICE_REG[i].workq_callback.arg = (void*)(long long)(i);
536 mb(); 603 mb();
537 if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].workq_callback) != 0) { 604 if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].workq_callback) != 0) {
538 TRACE("Failed to create nvklmworkqd thread for GPU %d\n", i); 605 TRACE("Failed to create nvklmworkqd thread for GPU %d\n", i);
539 } 606 }
540#endif 607#endif
541 } 608 }
542#endif 609#endif
543 } 610 }
544 611
545#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG 612#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
546 hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 613 hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
547 nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func; 614 nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func;
548 nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000); 615 nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000);
549#endif 616#endif
550 617
551 return(1); 618 return(1);
552} 619}
553 620
554 621
@@ -556,262 +623,262 @@ static int init_nv_device_reg(void)
556/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */ 623/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */
557static int shutdown_nv_device_reg(void) 624static int shutdown_nv_device_reg(void)
558{ 625{
559 TRACE("Shutting down nv device registration.\n"); 626 TRACE("Shutting down nv device registration.\n");
560 627
561#ifdef CONFIG_LITMUS_SOFTIRQD 628#ifdef CONFIG_LITMUS_SOFTIRQD
562 { 629 {
563 unsigned long flags; 630 unsigned long flags;
564 int i; 631 int i;
565 nv_device_registry_t *reg; 632 nv_device_registry_t *reg;
566 633
567 for (i = 0; i < num_online_gpus(); ++i) { 634 for (i = 0; i < num_online_gpus(); ++i) {
568 635
569 TRACE("Shutting down GPU %d.\n", i); 636 TRACE("Shutting down GPU %d.\n", i);
570 637
571 reg = &NV_DEVICE_REG[i]; 638 reg = &NV_DEVICE_REG[i];
572 639
573 if ((reg->interrupt_thread && reg->interrupt_ready) 640 if ((reg->interrupt_thread && reg->interrupt_ready)
574#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 641#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
575 || (reg->workq_thread && reg->workq_ready) 642 || (reg->workq_thread && reg->workq_ready)
576#endif 643#endif
577 ) 644 )
578 { 645 {
579 raw_spin_lock_irqsave(&reg->lock, flags); 646 raw_spin_lock_irqsave(&reg->lock, flags);
580 if (reg->interrupt_thread && reg->interrupt_ready) { 647 if (reg->interrupt_thread && reg->interrupt_ready) {
581 struct task_struct* th = reg->interrupt_thread; 648 struct task_struct* th = reg->interrupt_thread;
582 reg->interrupt_thread = NULL; 649 reg->interrupt_thread = NULL;
583 mb(); 650 mb();
584 reg->interrupt_ready = 0; 651 reg->interrupt_ready = 0;
585 mb(); 652 mb();
586 raw_spin_unlock_irqrestore(&reg->lock, flags); 653 raw_spin_unlock_irqrestore(&reg->lock, flags);
587 kill_klmirqd_thread(th); 654 kill_klmirqd_thread(th);
588 } 655 }
589 else 656 else
590 raw_spin_unlock_irqrestore(&reg->lock, flags); 657 raw_spin_unlock_irqrestore(&reg->lock, flags);
591 658
592#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 659#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
593 raw_spin_lock_irqsave(&reg->lock, flags); 660 raw_spin_lock_irqsave(&reg->lock, flags);
594 if (reg->workq_thread && reg->workq_ready) { 661 if (reg->workq_thread && reg->workq_ready) {
595 struct task_struct* th = reg->workq_thread; 662 struct task_struct* th = reg->workq_thread;
596 reg->workq_thread = NULL; 663 reg->workq_thread = NULL;
597 mb(); 664 mb();
598 reg->workq_ready = 0; 665 reg->workq_ready = 0;
599 mb(); 666 mb();
600 667
601 raw_spin_unlock_irqrestore(&reg->lock, flags); 668 raw_spin_unlock_irqrestore(&reg->lock, flags);
602 kill_klmirqd_thread(th); 669 kill_klmirqd_thread(th);
603 } 670 }
604 else 671 else
605 raw_spin_unlock_irqrestore(&reg->lock, flags); 672 raw_spin_unlock_irqrestore(&reg->lock, flags);
606#endif 673#endif
607 } 674 }
608 675
609 while (!binheap_empty(&reg->owners)) { 676 while (!binheap_empty(&reg->owners)) {
610 binheap_delete_root(&reg->owners, struct rt_param, gpu_owner_node); 677 binheap_delete_root(&reg->owners, struct rt_param, gpu_owner_node);
611 } 678 }
612 } 679 }
613 } 680 }
614#endif 681#endif
615 682
616 return(1); 683 return(1);
617} 684}
618 685
619 686
620/* use to get the owner of nv_device_id. */ 687/* use to get the owner of nv_device_id. */
621struct task_struct* get_nv_max_device_owner(u32 target_device_id) 688struct task_struct* get_nv_max_device_owner(u32 target_device_id)
622{ 689{
623 struct task_struct *owner = NULL; 690 struct task_struct *owner = NULL;
624 nv_device_registry_t *reg; 691 nv_device_registry_t *reg;
625 692
626 BUG_ON(target_device_id >= NV_DEVICE_NUM); 693 BUG_ON(target_device_id >= NV_DEVICE_NUM);
627 694
628 reg = &NV_DEVICE_REG[target_device_id]; 695 reg = &NV_DEVICE_REG[target_device_id];
629 696
630 if (!binheap_empty(&reg->owners)) { 697 if (!binheap_empty(&reg->owners)) {
631 struct task_struct *hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 698 struct task_struct *hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
632 struct task_struct, rt_param); 699 struct task_struct, rt_param);
633 TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid); 700 TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid);
634 701
635 owner = hp; 702 owner = hp;
636 } 703 }
637 704
638 return(owner); 705 return(owner);
639} 706}
640 707
641 708
642#ifdef CONFIG_LITMUS_SOFTIRQD 709#ifdef CONFIG_LITMUS_SOFTIRQD
643 710
644typedef enum { 711typedef enum {
645 INTERRUPT_TH, 712 INTERRUPT_TH,
646 WORKQ_TH 713 WORKQ_TH
647} nvklmtype_t; 714} nvklmtype_t;
648 715
649static struct task_struct* __get_klm_thread(nv_device_registry_t* reg, nvklmtype_t type) 716static struct task_struct* __get_klm_thread(nv_device_registry_t* reg, nvklmtype_t type)
650{ 717{
651 struct task_struct *klmirqd = NULL; 718 struct task_struct *klmirqd = NULL;
652 719
653 switch(type) 720 switch(type)
654 { 721 {
655 case INTERRUPT_TH: 722 case INTERRUPT_TH:
656#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON 723#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON
657 case WORKQ_TH: 724 case WORKQ_TH:
658#endif 725#endif
659 if(likely(reg->interrupt_ready)) 726 if(likely(reg->interrupt_ready))
660 klmirqd = reg->interrupt_thread; 727 klmirqd = reg->interrupt_thread;
661 break; 728 break;
662#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 729#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
663 case WORKQ_TH: 730 case WORKQ_TH:
664 if(likely(reg->workq_ready)) 731 if(likely(reg->workq_ready))
665 klmirqd = reg->workq_thread; 732 klmirqd = reg->workq_thread;
666 break; 733 break;
667#endif 734#endif
668 default: 735 default:
669 break; 736 break;
670 } 737 }
671 738
672 return klmirqd; 739 return klmirqd;
673} 740}
674 741
675static struct task_struct* __get_and_lock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type) 742static struct task_struct* __get_and_lock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type)
676{ 743{
677 struct task_struct *klmirqd; 744 struct task_struct *klmirqd;
678 745
679 raw_spin_lock_irqsave(&reg->lock, *flags); 746 raw_spin_lock_irqsave(&reg->lock, *flags);
680 klmirqd = __get_klm_thread(reg, type); 747 klmirqd = __get_klm_thread(reg, type);
681 748
682 if (!klmirqd) { 749 if (!klmirqd) {
683 /* unlock if thread does not exist or is not ready */ 750 /* unlock if thread does not exist or is not ready */
684 raw_spin_unlock_irqrestore(&reg->lock, *flags); 751 raw_spin_unlock_irqrestore(&reg->lock, *flags);
685 } 752 }
686 753
687 return klmirqd; 754 return klmirqd;
688} 755}
689 756
690static void __unlock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type) 757static void __unlock_klm_thread(nv_device_registry_t* reg, unsigned long* flags, nvklmtype_t type)
691{ 758{
692 /* workq and interrupts share a lock per GPU */ 759 /* workq and interrupts share a lock per GPU */
693 raw_spin_unlock_irqrestore(&reg->lock, *flags); 760 raw_spin_unlock_irqrestore(&reg->lock, *flags);
694} 761}
695 762
696struct task_struct* get_and_lock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags) 763struct task_struct* get_and_lock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags)
697{ 764{
698 nv_device_registry_t *reg; 765 nv_device_registry_t *reg;
699 struct task_struct *th; 766 struct task_struct *th;
700 BUG_ON(target_device_id >= NV_DEVICE_NUM); 767 BUG_ON(target_device_id >= NV_DEVICE_NUM);
701 768
702 if (unlikely(nvidia_mod == NULL)) 769 if (unlikely(nvidia_mod == NULL))
703 return NULL; 770 return NULL;
704 771
705 reg = &NV_DEVICE_REG[target_device_id]; 772 reg = &NV_DEVICE_REG[target_device_id];
706 th = __get_and_lock_klm_thread(reg, flags, INTERRUPT_TH); 773 th = __get_and_lock_klm_thread(reg, flags, INTERRUPT_TH);
707 774
708 barrier(); 775 barrier();
709 if (unlikely(nvidia_mod == NULL)) { 776 if (unlikely(nvidia_mod == NULL)) {
710 th = NULL; 777 th = NULL;
711 __unlock_klm_thread(reg, flags, INTERRUPT_TH); 778 __unlock_klm_thread(reg, flags, INTERRUPT_TH);
712 } 779 }
713 780
714 return th; 781 return th;
715} 782}
716 783
717void unlock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags) 784void unlock_nvklmirqd_thread(u32 target_device_id, unsigned long* flags)
718{ 785{
719 nv_device_registry_t *reg; 786 nv_device_registry_t *reg;
720 BUG_ON(target_device_id >= NV_DEVICE_NUM); 787 BUG_ON(target_device_id >= NV_DEVICE_NUM);
721 reg = &NV_DEVICE_REG[target_device_id]; 788 reg = &NV_DEVICE_REG[target_device_id];
722 __unlock_klm_thread(reg, flags, INTERRUPT_TH); 789 __unlock_klm_thread(reg, flags, INTERRUPT_TH);
723} 790}
724 791
725struct task_struct* get_nvklmirqd_thread(u32 target_device_id) 792struct task_struct* get_nvklmirqd_thread(u32 target_device_id)
726{ 793{
727 /* should this function be allowed? who will use klmirqd thread without thread safety? */ 794 /* should this function be allowed? who will use klmirqd thread without thread safety? */
728 unsigned long flags; 795 unsigned long flags;
729 struct task_struct *klmirqd; 796 struct task_struct *klmirqd;
730 klmirqd = get_and_lock_nvklmirqd_thread(target_device_id, &flags); 797 klmirqd = get_and_lock_nvklmirqd_thread(target_device_id, &flags);
731 if(klmirqd) 798 if(klmirqd)
732 unlock_nvklmirqd_thread(target_device_id, &flags); 799 unlock_nvklmirqd_thread(target_device_id, &flags);
733 return klmirqd; 800 return klmirqd;
734} 801}
735 802
736#if defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON) || defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED) 803#if defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON) || defined(CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED)
737 804
738struct task_struct* get_and_lock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags) 805struct task_struct* get_and_lock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags)
739{ 806{
740 nv_device_registry_t *reg; 807 nv_device_registry_t *reg;
741 struct task_struct *th; 808 struct task_struct *th;
742 BUG_ON(target_device_id >= NV_DEVICE_NUM); 809 BUG_ON(target_device_id >= NV_DEVICE_NUM);
743 810
744 if (unlikely(nvidia_mod == NULL)) 811 if (unlikely(nvidia_mod == NULL))
745 return NULL; 812 return NULL;
746 813
747 reg = &NV_DEVICE_REG[target_device_id]; 814 reg = &NV_DEVICE_REG[target_device_id];
748 th = __get_and_lock_klm_thread(reg, flags, WORKQ_TH); 815 th = __get_and_lock_klm_thread(reg, flags, WORKQ_TH);
749 816
750 barrier(); 817 barrier();
751 if (unlikely(nvidia_mod == NULL)) { 818 if (unlikely(nvidia_mod == NULL)) {
752 th = NULL; 819 th = NULL;
753 __unlock_klm_thread(reg, flags, WORKQ_TH); 820 __unlock_klm_thread(reg, flags, WORKQ_TH);
754 } 821 }
755 822
756 return th; 823 return th;
757} 824}
758 825
759void unlock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags) 826void unlock_nvklmworkqd_thread(u32 target_device_id, unsigned long* flags)
760{ 827{
761 nv_device_registry_t *reg; 828 nv_device_registry_t *reg;
762 BUG_ON(target_device_id >= NV_DEVICE_NUM); 829 BUG_ON(target_device_id >= NV_DEVICE_NUM);
763 reg = &NV_DEVICE_REG[target_device_id]; 830 reg = &NV_DEVICE_REG[target_device_id];
764 __unlock_klm_thread(reg, flags, WORKQ_TH); 831 __unlock_klm_thread(reg, flags, WORKQ_TH);
765} 832}
766 833
767 834
768struct task_struct* get_nvklmworkqd_thread(u32 target_device_id) 835struct task_struct* get_nvklmworkqd_thread(u32 target_device_id)
769{ 836{
770 /* should this function be allowed? who will use klmirqd thread without thread safety? */ 837 /* should this function be allowed? who will use klmirqd thread without thread safety? */
771 unsigned long flags; 838 unsigned long flags;
772 struct task_struct *klmirqd; 839 struct task_struct *klmirqd;
773 klmirqd = get_and_lock_nvklmworkqd_thread(target_device_id, &flags); 840 klmirqd = get_and_lock_nvklmworkqd_thread(target_device_id, &flags);
774 if(klmirqd) 841 if(klmirqd)
775 unlock_nvklmworkqd_thread(target_device_id, &flags); 842 unlock_nvklmworkqd_thread(target_device_id, &flags);
776 return klmirqd; 843 return klmirqd;
777} 844}
778#endif // end WORKQs 845#endif // end WORKQs
779 846
780 847
781static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp) 848static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp)
782{ 849{
783 int retval = 0; 850 int retval = 0;
784 851
785 /* 852 /*
786 TRACE_CUR("Increasing priority of %s/%d to %s/%d.\n", 853 TRACE_CUR("Increasing priority of %s/%d to %s/%d.\n",
787 klmirqd->comm, klmirqd->pid, 854 klmirqd->comm, klmirqd->pid,
788 (hp) ? hp->comm : "null", 855 (hp) ? hp->comm : "null",
789 (hp) ? hp->pid : 0); 856 (hp) ? hp->pid : 0);
790 */ 857 */
791 858
792 /* the klmirqd thread should never attempt to hold a litmus-level real-time 859 /* the klmirqd thread should never attempt to hold a litmus-level real-time
793 * so nested support is not required */ 860 * so nested support is not required */
794 retval = litmus->__increase_prio(klmirqd, hp); 861 retval = litmus->__increase_prio(klmirqd, hp);
795 862
796 return retval; 863 return retval;
797} 864}
798 865
799static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp, int budget_triggered) 866static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp, int budget_triggered)
800{ 867{
801 int retval = 0; 868 int retval = 0;
802 869
803 /* 870 /*
804 TRACE_CUR("Decreasing priority of %s/%d to %s/%d.\n", 871 TRACE_CUR("Decreasing priority of %s/%d to %s/%d.\n",
805 klmirqd->comm, klmirqd->pid, 872 klmirqd->comm, klmirqd->pid,
806 (hp) ? hp->comm : "null", 873 (hp) ? hp->comm : "null",
807 (hp) ? hp->pid : 0); 874 (hp) ? hp->pid : 0);
808 */ 875 */
809 876
810 /* the klmirqd thread should never attempt to hold a litmus-level real-time 877 /* the klmirqd thread should never attempt to hold a litmus-level real-time
811 * so nested support is not required */ 878 * so nested support is not required */
812 retval = litmus->__decrease_prio(klmirqd, hp, budget_triggered); 879 retval = litmus->__decrease_prio(klmirqd, hp, budget_triggered);
813 880
814 return retval; 881 return retval;
815} 882}
816#endif // end CONFIG_LITMUS_SOFTIRQD 883#endif // end CONFIG_LITMUS_SOFTIRQD
817 884
@@ -821,159 +888,159 @@ static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct tas
821/* call when an gpu owner becomes real-time */ 888/* call when an gpu owner becomes real-time */
822long enable_gpu_owner(struct task_struct *t) 889long enable_gpu_owner(struct task_struct *t)
823{ 890{
824 long retval = 0; 891 long retval = 0;
825 int gpu; 892 int gpu;
826 nv_device_registry_t *reg; 893 nv_device_registry_t *reg;
827 894
828#ifdef CONFIG_LITMUS_SOFTIRQD 895#ifdef CONFIG_LITMUS_SOFTIRQD
829 struct task_struct *hp; 896 struct task_struct *hp;
830#endif 897#endif
831 898
832 if (!tsk_rt(t)->held_gpus) { 899 if (!tsk_rt(t)->held_gpus) {
833// TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); 900// TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
834 return -1; 901 return -1;
835 } 902 }
836 903
837 BUG_ON(!is_realtime(t)); 904 BUG_ON(!is_realtime(t));
838 905
839 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); 906 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus));
840 907
841 if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { 908 if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
842 TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu); 909 TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu);
843 goto out; 910 goto out;
844 } 911 }
845 912
846 /* update the registration (and maybe klmirqd) */ 913 /* update the registration (and maybe klmirqd) */
847 reg = &NV_DEVICE_REG[gpu]; 914 reg = &NV_DEVICE_REG[gpu];
848 915
849 binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners, 916 binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
850 struct rt_param, gpu_owner_node); 917 struct rt_param, gpu_owner_node);
851 918
852 919
853#ifdef CONFIG_LITMUS_SOFTIRQD 920#ifdef CONFIG_LITMUS_SOFTIRQD
854 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 921 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
855 struct task_struct, rt_param); 922 struct task_struct, rt_param);
856 923
857 if (hp == t) { 924 if (hp == t) {
858 int interrupt_success; 925 int interrupt_success;
859 926
860 /* we're the new hp */ 927 /* we're the new hp */
861 /* 928 /*
862 TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n", 929 TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n",
863 t->comm, t->pid, 930 t->comm, t->pid,
864 effective_priority(t)->comm, effective_priority(t)->pid, 931 effective_priority(t)->comm, effective_priority(t)->pid,
865 gpu); 932 gpu);
866 */ 933 */
867 934
868 interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, effective_priority(t)); 935 interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, effective_priority(t));
869 936
870#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 937#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
871 { 938 {
872 int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, effective_priority(t)); 939 int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, effective_priority(t));
873 if(interrupt_success != 1 || workq_success != 1) 940 if(interrupt_success != 1 || workq_success != 1)
874 retval = (interrupt_success != 1) ? interrupt_success : workq_success; 941 retval = (interrupt_success != 1) ? interrupt_success : workq_success;
875 else 942 else
876 retval = 1; 943 retval = 1;
877 } 944 }
878#else 945#else
879 retval = interrupt_success; 946 retval = interrupt_success;
880#endif 947#endif
881 } 948 }
882#endif 949#endif
883 950
884out: 951out:
885 return retval; 952 return retval;
886} 953}
887 954
888/* call when an gpu owner exits real-time */ 955/* call when an gpu owner exits real-time */
889long disable_gpu_owner(struct task_struct *t) 956long disable_gpu_owner(struct task_struct *t)
890{ 957{
891 long retval = 0; 958 long retval = 0;
892 int gpu; 959 int gpu;
893 nv_device_registry_t *reg; 960 nv_device_registry_t *reg;
894 961
895#ifdef CONFIG_LITMUS_SOFTIRQD 962#ifdef CONFIG_LITMUS_SOFTIRQD
896 struct task_struct *hp; 963 struct task_struct *hp;
897 struct task_struct *new_hp = NULL; 964 struct task_struct *new_hp = NULL;
898#endif 965#endif
899 966
900 if (!tsk_rt(t)->held_gpus) { 967 if (!tsk_rt(t)->held_gpus) {
901 TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); 968 TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
902 return -1; 969 return -1;
903 } 970 }
904 971
905 BUG_ON(!is_realtime(t)); 972 BUG_ON(!is_realtime(t));
906 973
907 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); 974 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus));
908 975
909 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { 976 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
910// TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu); 977// TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu);
911 goto out; 978 goto out;
912 } 979 }
913 980
914// TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu); 981// TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu);
915 982
916 983
917 reg = &NV_DEVICE_REG[gpu]; 984 reg = &NV_DEVICE_REG[gpu];
918 985
919#ifdef CONFIG_LITMUS_SOFTIRQD 986#ifdef CONFIG_LITMUS_SOFTIRQD
920 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 987 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
921 struct task_struct, rt_param); 988 struct task_struct, rt_param);
922 989
923 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners); 990 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
924 991
925 992
926 if (!binheap_empty(&reg->owners)) { 993 if (!binheap_empty(&reg->owners)) {
927 new_hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 994 new_hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
928 struct task_struct, rt_param); 995 struct task_struct, rt_param);
929 } 996 }
930 997
931 if (hp == t && new_hp != t) { 998 if (hp == t && new_hp != t) {
932 int interrupt_success; 999 int interrupt_success;
933#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 1000#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
934#endif 1001#endif
935 struct task_struct *to_inh = (new_hp) ? effective_priority(new_hp) : NULL; 1002 struct task_struct *to_inh = (new_hp) ? effective_priority(new_hp) : NULL;
936 1003
937 /* 1004 /*
938 TRACE_CUR("%s/%d is no longer hp on GPU %d; new hp = %s/%d (eff_prio = %s/%d).\n", 1005 TRACE_CUR("%s/%d is no longer hp on GPU %d; new hp = %s/%d (eff_prio = %s/%d).\n",
939 t->comm, t->pid, 1006 t->comm, t->pid,
940 gpu, 1007 gpu,
941 (new_hp) ? new_hp->comm : "null", 1008 (new_hp) ? new_hp->comm : "null",
942 (new_hp) ? new_hp->pid : 0, 1009 (new_hp) ? new_hp->pid : 0,
943 (to_inh) ? to_inh->comm : "null", 1010 (to_inh) ? to_inh->comm : "null",
944 (to_inh) ? to_inh->pid : 0); 1011 (to_inh) ? to_inh->pid : 0);
945 */ 1012 */
946 1013
947 interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, to_inh, 0); 1014 interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, to_inh, 0);
948 1015
949#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 1016#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
950 { 1017 {
951 int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, to_inh, 0); 1018 int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, to_inh, 0);
952 if(interrupt_success != 1 || workq_success != 1) 1019 if(interrupt_success != 1 || workq_success != 1)
953 retval = (interrupt_success != 1) ? interrupt_success : workq_success; 1020 retval = (interrupt_success != 1) ? interrupt_success : workq_success;
954 else 1021 else
955 retval = 1; 1022 retval = 1;
956 } 1023 }
957#else 1024#else
958 retval = interrupt_success; 1025 retval = interrupt_success;
959#endif 1026#endif
960 } 1027 }
961#else 1028#else
962 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners); 1029 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
963#endif 1030#endif
964 1031
965out: 1032out:
966 return retval; 1033 return retval;
967} 1034}
968 1035
969 1036
970long recheck_gpu_owner(struct task_struct* t) 1037long recheck_gpu_owner(struct task_struct* t)
971{ 1038{
972 /* TODO: blend implementation of disable/enable */ 1039 /* TODO: blend implementation of disable/enable */
973 int retval = disable_gpu_owner(t); 1040 int retval = disable_gpu_owner(t);
974 if (!retval) 1041 if (!retval)
975 retval = enable_gpu_owner(t); 1042 retval = enable_gpu_owner(t);
976 return retval; 1043 return retval;
977} 1044}
978 1045
979 1046
@@ -984,177 +1051,177 @@ long recheck_gpu_owner(struct task_struct* t)
984 1051
985int gpu_owner_increase_priority(struct task_struct *t) 1052int gpu_owner_increase_priority(struct task_struct *t)
986{ 1053{
987 int retval = 0; 1054 int retval = 0;
988 int gpu; 1055 int gpu;
989 nv_device_registry_t *reg; 1056 nv_device_registry_t *reg;
990 1057
991 struct task_struct *hp = NULL; 1058 struct task_struct *hp = NULL;
992 struct task_struct *hp_eff = NULL; 1059 struct task_struct *hp_eff = NULL;
993 1060
994#ifdef CONFIG_LITMUS_SOFTIRQD 1061#ifdef CONFIG_LITMUS_SOFTIRQD
995 int increase_klmirqd = 0; 1062 int increase_klmirqd = 0;
996#endif 1063#endif
997 1064
998 BUG_ON(!is_realtime(t)); 1065 BUG_ON(!is_realtime(t));
999 BUG_ON(!tsk_rt(t)->held_gpus); 1066 BUG_ON(!tsk_rt(t)->held_gpus);
1000 1067
1001 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); 1068 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus));
1002 1069
1003 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { 1070 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
1004 TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", 1071 TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n",
1005 t->comm, t->pid, gpu); 1072 t->comm, t->pid, gpu);
1006 goto out; 1073 goto out;
1007 } 1074 }
1008 1075
1009 TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu); 1076 TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu);
1010 reg = &NV_DEVICE_REG[gpu]; 1077 reg = &NV_DEVICE_REG[gpu];
1011 1078
1012 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 1079 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
1013 struct task_struct, rt_param); 1080 struct task_struct, rt_param);
1014 hp_eff = effective_priority(hp); 1081 hp_eff = effective_priority(hp);
1015 1082
1016 if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ 1083 if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
1017 binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners); 1084 binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners);
1018 } 1085 }
1019#ifdef CONFIG_LITMUS_SOFTIRQD 1086#ifdef CONFIG_LITMUS_SOFTIRQD
1020 else { 1087 else {
1021 /* unconditionally propagate - t already has the updated eff and is at the root, 1088 /* unconditionally propagate - t already has the updated eff and is at the root,
1022 so we can't detect a change in inheritance, but we know that priority has 1089 so we can't detect a change in inheritance, but we know that priority has
1023 indeed increased/changed. */ 1090 indeed increased/changed. */
1024 increase_klmirqd = 1; 1091 increase_klmirqd = 1;
1025 } 1092 }
1026 1093
1027 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 1094 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
1028 struct task_struct, rt_param); 1095 struct task_struct, rt_param);
1029 1096
1030 /* check if the eff. prio. of hp has changed */ 1097 /* check if the eff. prio. of hp has changed */
1031 if (increase_klmirqd || (effective_priority(hp) != hp_eff)) { 1098 if (increase_klmirqd || (effective_priority(hp) != hp_eff)) {
1032 int interrupt_success; 1099 int interrupt_success;
1033 1100
1034 hp_eff = effective_priority(hp); 1101 hp_eff = effective_priority(hp);
1035 TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n", 1102 TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n",
1036 t->comm, t->pid, 1103 t->comm, t->pid,
1037 hp_eff->comm, hp_eff->pid, 1104 hp_eff->comm, hp_eff->pid,
1038 gpu); 1105 gpu);
1039 1106
1040 interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, hp_eff); 1107 interrupt_success = gpu_klmirqd_increase_priority(reg->interrupt_thread, hp_eff);
1041 1108
1042#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 1109#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
1043 { 1110 {
1044 int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, hp_eff); 1111 int workq_success = gpu_klmirqd_increase_priority(reg->workq_thread, hp_eff);
1045 if(interrupt_success != 1 || workq_success != 1) 1112 if(interrupt_success != 1 || workq_success != 1)
1046 retval = (interrupt_success != 1) ? interrupt_success : workq_success; 1113 retval = (interrupt_success != 1) ? interrupt_success : workq_success;
1047 else 1114 else
1048 retval = 1; 1115 retval = 1;
1049 } 1116 }
1050#else 1117#else
1051 retval = interrupt_success; 1118 retval = interrupt_success;
1052#endif 1119#endif
1053 1120
1054 } 1121 }
1055#endif 1122#endif
1056 1123
1057out: 1124out:
1058 return retval; 1125 return retval;
1059} 1126}
1060 1127
1061 1128
1062int gpu_owner_decrease_priority(struct task_struct *t) 1129int gpu_owner_decrease_priority(struct task_struct *t)
1063{ 1130{
1064 int retval = 0; 1131 int retval = 0;
1065 int gpu; 1132 int gpu;
1066 nv_device_registry_t *reg; 1133 nv_device_registry_t *reg;
1067 1134
1068 struct task_struct *hp = NULL; 1135 struct task_struct *hp = NULL;
1069 struct task_struct *hp_eff = NULL; 1136 struct task_struct *hp_eff = NULL;
1070 1137
1071 BUG_ON(!is_realtime(t)); 1138 BUG_ON(!is_realtime(t));
1072 BUG_ON(!tsk_rt(t)->held_gpus); 1139 BUG_ON(!tsk_rt(t)->held_gpus);
1073 1140
1074 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); 1141 gpu = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus));
1075 1142
1076 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { 1143 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
1077 TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", 1144 TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n",
1078 t->comm, t->pid, gpu); 1145 t->comm, t->pid, gpu);
1079 goto out; 1146 goto out;
1080 } 1147 }
1081 1148
1082 TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu); 1149 TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu);
1083 reg = &NV_DEVICE_REG[gpu]; 1150 reg = &NV_DEVICE_REG[gpu];
1084 1151
1085 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 1152 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
1086 struct task_struct, rt_param); 1153 struct task_struct, rt_param);
1087 hp_eff = effective_priority(hp); 1154 hp_eff = effective_priority(hp);
1088 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners); 1155 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
1089 binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners, 1156 binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
1090 struct rt_param, gpu_owner_node); 1157 struct rt_param, gpu_owner_node);
1091 1158
1092#ifdef CONFIG_LITMUS_SOFTIRQD 1159#ifdef CONFIG_LITMUS_SOFTIRQD
1093 if (hp == t) { /* t was originally the hp */ 1160 if (hp == t) { /* t was originally the hp */
1094 struct task_struct *new_hp = 1161 struct task_struct *new_hp =
1095 container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node), 1162 container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
1096 struct task_struct, rt_param); 1163 struct task_struct, rt_param);
1097 /* if the new_hp is still t, or if the effective priority has changed */ 1164 /* if the new_hp is still t, or if the effective priority has changed */
1098 if ((new_hp == t) || (effective_priority(new_hp) != hp_eff)) { 1165 if ((new_hp == t) || (effective_priority(new_hp) != hp_eff)) {
1099 int interrupt_success; 1166 int interrupt_success;
1100 1167
1101 hp_eff = effective_priority(new_hp); 1168 hp_eff = effective_priority(new_hp);
1102 TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); 1169 TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
1103 interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, hp_eff, 1); 1170 interrupt_success = gpu_klmirqd_decrease_priority(reg->interrupt_thread, hp_eff, 1);
1104 1171
1105#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED 1172#ifdef CONFIG_LITMUS_NVIDIA_WORKQ_ON_DEDICATED
1106 { 1173 {
1107 int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, hp_eff, 1); 1174 int workq_success = gpu_klmirqd_decrease_priority(reg->workq_thread, hp_eff, 1);
1108 if(interrupt_success != 1 || workq_success != 1) 1175 if(interrupt_success != 1 || workq_success != 1)
1109 retval = (interrupt_success != 1) ? interrupt_success : workq_success; 1176 retval = (interrupt_success != 1) ? interrupt_success : workq_success;
1110 else 1177 else
1111 retval = 1; 1178 retval = 1;
1112 } 1179 }
1113#else 1180#else
1114 retval = interrupt_success; 1181 retval = interrupt_success;
1115#endif 1182#endif
1116 } 1183 }
1117 } 1184 }
1118#endif 1185#endif
1119 1186
1120out: 1187out:
1121 return retval; 1188 return retval;
1122} 1189}
1123 1190
1124 1191
1125 1192
1126static int __reg_nv_device(int reg_device_id, struct task_struct *t) 1193static int __reg_nv_device(int reg_device_id, struct task_struct *t)
1127{ 1194{
1128 __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); 1195 __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
1129 1196
1130 return(0); 1197 return(0);
1131} 1198}
1132 1199
1133static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) 1200static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
1134{ 1201{
1135 __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); 1202 __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
1136 1203
1137 return(0); 1204 return(0);
1138} 1205}
1139 1206
1140 1207
1141int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) 1208int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
1142{ 1209{
1143 int ret; 1210 int ret;
1144 1211
1145 if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0)) 1212 if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0))
1146 { 1213 {
1147 if(reg_action) 1214 if(reg_action)
1148 ret = __reg_nv_device(reg_device_id, t); 1215 ret = __reg_nv_device(reg_device_id, t);
1149 else 1216 else
1150 ret = __clear_reg_nv_device(reg_device_id, t); 1217 ret = __clear_reg_nv_device(reg_device_id, t);
1151 } 1218 }
1152 else 1219 else
1153 { 1220 {
1154 ret = -ENODEV; 1221 ret = -ENODEV;
1155 } 1222 }
1156 1223
1157 return(ret); 1224 return(ret);
1158} 1225}
1159 1226
1160 1227
@@ -1162,45 +1229,45 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
1162#ifdef CONFIG_LITMUS_PAI_SOFTIRQD 1229#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1163//void pai_check_priority_increase(struct task_struct *t, int reg_device_id) 1230//void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
1164//{ 1231//{
1165// unsigned long flags; 1232// unsigned long flags;
1166// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; 1233// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
1167// 1234//
1168// 1235//
1169// 1236//
1170// if(reg->max_prio_owner != t) { 1237// if(reg->max_prio_owner != t) {
1171// 1238//
1172// raw_spin_lock_irqsave(&reg->lock, flags); 1239// raw_spin_lock_irqsave(&reg->lock, flags);
1173// 1240//
1174// if(reg->max_prio_owner != t) { 1241// if(reg->max_prio_owner != t) {
1175// if(litmus->compare(t, reg->max_prio_owner)) { 1242// if(litmus->compare(t, reg->max_prio_owner)) {
1176// litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); 1243// litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
1177// reg->max_prio_owner = t; 1244// reg->max_prio_owner = t;
1178// } 1245// }
1179// } 1246// }
1180// 1247//
1181// raw_spin_unlock_irqrestore(&reg->lock, flags); 1248// raw_spin_unlock_irqrestore(&reg->lock, flags);
1182// } 1249// }
1183//} 1250//}
1184// 1251//
1185// 1252//
1186//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) 1253//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
1187//{ 1254//{
1188// unsigned long flags; 1255// unsigned long flags;
1189// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; 1256// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
1190// 1257//
1191// if(reg->max_prio_owner == t) { 1258// if(reg->max_prio_owner == t) {
1192// 1259//
1193// raw_spin_lock_irqsave(&reg->lock, flags); 1260// raw_spin_lock_irqsave(&reg->lock, flags);
1194// 1261//
1195// if(reg->max_prio_owner == t) { 1262// if(reg->max_prio_owner == t) {
1196// reg->max_prio_owner = find_hp_owner(reg, NULL); 1263// reg->max_prio_owner = find_hp_owner(reg, NULL);
1197// if(reg->max_prio_owner != t) { 1264// if(reg->max_prio_owner != t) {
1198// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); 1265// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
1199// } 1266// }
1200// } 1267// }
1201// 1268//
1202// raw_spin_unlock_irqrestore(&reg->lock, flags); 1269// raw_spin_unlock_irqrestore(&reg->lock, flags);
1203// } 1270// }
1204//} 1271//}
1205#endif 1272#endif
1206 1273
@@ -1210,144 +1277,144 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
1210 1277
1211//static int __reg_nv_device(int reg_device_id, struct task_struct *t) 1278//static int __reg_nv_device(int reg_device_id, struct task_struct *t)
1212//{ 1279//{
1213// int ret = 0; 1280// int ret = 0;
1214// int i; 1281// int i;
1215// struct task_struct *old_max = NULL; 1282// struct task_struct *old_max = NULL;
1216// 1283//
1217// 1284//
1218// raw_spin_lock_irqsave(&reg->lock, flags); 1285// raw_spin_lock_irqsave(&reg->lock, flags);
1219// 1286//
1220// if(reg->nr_owners < NV_MAX_SIMULT_USERS) { 1287// if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
1221// TRACE_TASK(t, "registers GPU %d\n", reg_device_id); 1288// TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
1222// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { 1289// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
1223// if(reg->owners[i] == NULL) { 1290// if(reg->owners[i] == NULL) {
1224// reg->owners[i] = t; 1291// reg->owners[i] = t;
1225// 1292//
1226// //if(edf_higher_prio(t, reg->max_prio_owner)) { 1293// //if(edf_higher_prio(t, reg->max_prio_owner)) {
1227// if(litmus->compare(t, reg->max_prio_owner)) { 1294// if(litmus->compare(t, reg->max_prio_owner)) {
1228// old_max = reg->max_prio_owner; 1295// old_max = reg->max_prio_owner;
1229// reg->max_prio_owner = t; 1296// reg->max_prio_owner = t;
1230// 1297//
1231//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD 1298//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1232// litmus->change_prio_pai_tasklet(old_max, t); 1299// litmus->change_prio_pai_tasklet(old_max, t);
1233//#endif 1300//#endif
1234// } 1301// }
1235// 1302//
1236//#ifdef CONFIG_LITMUS_SOFTIRQD 1303//#ifdef CONFIG_LITMUS_SOFTIRQD
1237// down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); 1304// down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
1238//#endif 1305//#endif
1239// ++(reg->nr_owners); 1306// ++(reg->nr_owners);
1240// 1307//
1241// break; 1308// break;
1242// } 1309// }
1243// } 1310// }
1244// } 1311// }
1245// else 1312// else
1246// { 1313// {
1247// TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); 1314// TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
1248// //ret = -EBUSY; 1315// //ret = -EBUSY;
1249// } 1316// }
1250// 1317//
1251// raw_spin_unlock_irqrestore(&reg->lock, flags); 1318// raw_spin_unlock_irqrestore(&reg->lock, flags);
1252// 1319//
1253// __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); 1320// __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
1254// 1321//
1255// return(ret); 1322// return(ret);
1256//} 1323//}
1257// 1324//
1258//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) 1325//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
1259//{ 1326//{
1260// int ret = 0; 1327// int ret = 0;
1261// int i; 1328// int i;
1262// unsigned long flags; 1329// unsigned long flags;
1263// nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; 1330// nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
1264// 1331//
1265//#ifdef CONFIG_LITMUS_SOFTIRQD 1332//#ifdef CONFIG_LITMUS_SOFTIRQD
1266// struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); 1333// struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
1267//#endif 1334//#endif
1268// 1335//
1269// if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { 1336// if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
1270// return ret; 1337// return ret;
1271// } 1338// }
1272// 1339//
1273// raw_spin_lock_irqsave(&reg->lock, flags); 1340// raw_spin_lock_irqsave(&reg->lock, flags);
1274// 1341//
1275// TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); 1342// TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
1276// 1343//
1277// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { 1344// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
1278// if(reg->owners[i] == t) { 1345// if(reg->owners[i] == t) {
1279//#ifdef CONFIG_LITMUS_SOFTIRQD 1346//#ifdef CONFIG_LITMUS_SOFTIRQD
1280// flush_pending(klmirqd_th, t); 1347// flush_pending(klmirqd_th, t);
1281//#endif 1348//#endif
1282// if(reg->max_prio_owner == t) { 1349// if(reg->max_prio_owner == t) {
1283// reg->max_prio_owner = find_hp_owner(reg, t); 1350// reg->max_prio_owner = find_hp_owner(reg, t);
1284//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD 1351//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1285// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); 1352// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
1286//#endif 1353//#endif
1287// } 1354// }
1288// 1355//
1289//#ifdef CONFIG_LITMUS_SOFTIRQD 1356//#ifdef CONFIG_LITMUS_SOFTIRQD
1290// up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); 1357// up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
1291//#endif 1358//#endif
1292// 1359//
1293// reg->owners[i] = NULL; 1360// reg->owners[i] = NULL;
1294// --(reg->nr_owners); 1361// --(reg->nr_owners);
1295// 1362//
1296// break; 1363// break;
1297// } 1364// }
1298// } 1365// }
1299// 1366//
1300// raw_spin_unlock_irqrestore(&reg->lock, flags); 1367// raw_spin_unlock_irqrestore(&reg->lock, flags);
1301// 1368//
1302// __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); 1369// __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
1303// 1370//
1304// return(ret); 1371// return(ret);
1305//} 1372//}
1306// 1373//
1307// 1374//
1308//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) 1375//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
1309//{ 1376//{
1310// int ret; 1377// int ret;
1311// 1378//
1312// if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) 1379// if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
1313// { 1380// {
1314// if(reg_action) 1381// if(reg_action)
1315// ret = __reg_nv_device(reg_device_id, t); 1382// ret = __reg_nv_device(reg_device_id, t);
1316// else 1383// else
1317// ret = __clear_reg_nv_device(reg_device_id, t); 1384// ret = __clear_reg_nv_device(reg_device_id, t);
1318// } 1385// }
1319// else 1386// else
1320// { 1387// {
1321// ret = -ENODEV; 1388// ret = -ENODEV;
1322// } 1389// }
1323// 1390//
1324// return(ret); 1391// return(ret);
1325//} 1392//}
1326 1393
1327 1394
1328 1395
1329//void lock_nv_registry(u32 target_device_id, unsigned long* flags) 1396//void lock_nv_registry(u32 target_device_id, unsigned long* flags)
1330//{ 1397//{
1331// BUG_ON(target_device_id >= NV_DEVICE_NUM); 1398// BUG_ON(target_device_id >= NV_DEVICE_NUM);
1332// 1399//
1333// if(in_interrupt()) 1400// if(in_interrupt())
1334// TRACE("Locking registry for %d.\n", target_device_id); 1401// TRACE("Locking registry for %d.\n", target_device_id);
1335// else 1402// else
1336// TRACE_CUR("Locking registry for %d.\n", target_device_id); 1403// TRACE_CUR("Locking registry for %d.\n", target_device_id);
1337// 1404//
1338// raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); 1405// raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
1339//} 1406//}
1340// 1407//
1341//void unlock_nv_registry(u32 target_device_id, unsigned long* flags) 1408//void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
1342//{ 1409//{
1343// BUG_ON(target_device_id >= NV_DEVICE_NUM); 1410// BUG_ON(target_device_id >= NV_DEVICE_NUM);
1344// 1411//
1345// if(in_interrupt()) 1412// if(in_interrupt())
1346// TRACE("Unlocking registry for %d.\n", target_device_id); 1413// TRACE("Unlocking registry for %d.\n", target_device_id);
1347// else 1414// else
1348// TRACE_CUR("Unlocking registry for %d.\n", target_device_id); 1415// TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
1349// 1416//
1350// raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); 1417// raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
1351//} 1418//}
1352 1419
1353 1420