aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-12-13 17:15:17 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-12-13 17:15:17 -0500
commit8f4bc19471bd49f4dcf6ab20254b7c71ec12e4e2 (patch)
tree465a8f5ec2fd6b76f43dc791ef4f78cae226386a
parentbb9b9d2075a717ea77cb83c30d55aed366bececf (diff)
Fix several klmirqd bugs.
1) Deadlock in litmus_task_exit()-- added litmus_pre_task_exit() to be called without the Linux runqueue lock held. 2) Prioritization of base-prio klmirqd/aux threads vs. normal real-time tasks. 3) Initialization of gpu owner binheap node moved to *after* memset(0) of rt_params. 4) Exit path of klmirqd threads.
-rw-r--r--include/litmus/litmus.h2
-rw-r--r--kernel/sched.c3
-rw-r--r--litmus/Kconfig8
-rw-r--r--litmus/edf_common.c20
-rw-r--r--litmus/litmus.c36
-rw-r--r--litmus/litmus_softirq.c4
-rw-r--r--litmus/nvidia_info.c87
-rw-r--r--litmus/sched_cedf.c6
8 files changed, 138 insertions, 28 deletions
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 711b88e2b3d1..54f33e835682 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -38,6 +38,8 @@ void litmus_exec(void);
38void exit_litmus(struct task_struct *dead_tsk); 38void exit_litmus(struct task_struct *dead_tsk);
39 39
40long litmus_admit_task(struct task_struct *tsk); 40long litmus_admit_task(struct task_struct *tsk);
41
42void litmus_pre_exit_task(struct task_struct *tsk); // called before litmus_exit_task, but without run queue locks held
41void litmus_exit_task(struct task_struct *tsk); 43void litmus_exit_task(struct task_struct *tsk);
42 44
43#define is_realtime(t) ((t)->policy == SCHED_LITMUS) 45#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
diff --git a/kernel/sched.c b/kernel/sched.c
index 840f87bce097..a1f10984adb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5288,6 +5288,9 @@ recheck:
5288 if (retval) 5288 if (retval)
5289 return retval; 5289 return retval;
5290 } 5290 }
5291 else if (p->policy == SCHED_LITMUS) {
5292 litmus_pre_exit_task(p);
5293 }
5291 5294
5292 /* 5295 /*
5293 * make sure no PI-waiters arrive (or leave) while we are 5296 * make sure no PI-waiters arrive (or leave) while we are
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 8ca66b4d687c..b704e893e9be 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -470,6 +470,14 @@ config CUDA_3_2
470 470
471endchoice 471endchoice
472 472
473config LITMUS_NV_KLMIRQD_DEBUG
474 bool "Raise fake sporadic tasklets to test nv klimirqd threads."
475 depends on LITMUS_NVIDIA && LITMUS_SOFTIRQD
476 default n
477 help
478 Causes tasklets to be sporadically dispatched to waiting klmirqd
479 threads.
480
473endmenu 481endmenu
474 482
475endmenu 483endmenu
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 27b728a55669..255e4f36e413 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -119,8 +119,15 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
119 goto aux_tie_break; 119 goto aux_tie_break;
120 } 120 }
121 else { 121 else {
122
122 // make the aux thread lowest priority real-time task 123 // make the aux thread lowest priority real-time task
123 int temp = (first_lo_aux) ? !is_realtime(second) : !is_realtime(first); 124 int temp = 0;
125 if (first_lo_aux && is_realtime(second)) {
126// temp = 0;
127 }
128 else if(second_lo_aux && is_realtime(first)) {
129 temp = 1;
130 }
124 TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); 131 TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
125 return temp; 132 return temp;
126 } 133 }
@@ -149,8 +156,15 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
149 goto klmirqd_tie_break; 156 goto klmirqd_tie_break;
150 } 157 }
151 else { 158 else {
152 // make the klmirqd thread (second) lowest priority real-time task 159 // make the klmirqd thread the lowest-priority real-time task
153 int temp = (first_lo_klmirqd) ? !is_realtime(second) : !is_realtime(first); 160 // but (above low-prio aux tasks and Linux tasks)
161 int temp = 0;
162 if (first_lo_klmirqd && is_realtime(second)) {
163// temp = 0;
164 }
165 else if(second_lo_klmirqd && is_realtime(first)) {
166 temp = 1;
167 }
154 TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); 168 TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
155 return temp; 169 return temp;
156 } 170 }
diff --git a/litmus/litmus.c b/litmus/litmus.c
index f98aa9d778a2..1aada57176de 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -355,8 +355,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
355 ctrl_page = p->rt_param.ctrl_page; 355 ctrl_page = p->rt_param.ctrl_page;
356 } 356 }
357 357
358#ifdef CONFIG_LITMUS_NESTED_LOCKING 358#ifdef CONFIG_LITMUS_NVIDIA
359 prio_order = p->rt_param.hp_blocked_tasks.compare; 359 WARN_ON(p->rt_param.held_gpus != 0);
360#endif 360#endif
361 361
362#ifdef CONFIG_LITMUS_LOCKING 362#ifdef CONFIG_LITMUS_LOCKING
@@ -367,15 +367,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
367#endif 367#endif
368 368
369#ifdef CONFIG_LITMUS_NESTED_LOCKING 369#ifdef CONFIG_LITMUS_NESTED_LOCKING
370// WARN_ON(p->rt_param.blocked_lock); 370 prio_order = p->rt_param.hp_blocked_tasks.compare;
371// WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
372#endif
373
374
375#ifdef CONFIG_LITMUS_NVIDIA
376 WARN_ON(p->rt_param.held_gpus != 0);
377
378 INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
379#endif 371#endif
380 372
381 /* Cleanup everything else. */ 373 /* Cleanup everything else. */
@@ -384,8 +376,9 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
384#ifdef CONFIG_REALTIME_AUX_TASKS 376#ifdef CONFIG_REALTIME_AUX_TASKS
385 /* also clear out the aux_data. the !restore case is only called on 377 /* also clear out the aux_data. the !restore case is only called on
386 * fork (initial thread creation). */ 378 * fork (initial thread creation). */
387 if (!restore) 379 if (!restore) {
388 memset(&p->aux_data, 0, sizeof(p->aux_data)); 380 memset(&p->aux_data, 0, sizeof(p->aux_data));
381 }
389#endif 382#endif
390 383
391 /* Restore preserved fields. */ 384 /* Restore preserved fields. */
@@ -394,6 +387,10 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
394 p->rt_param.ctrl_page = ctrl_page; 387 p->rt_param.ctrl_page = ctrl_page;
395 } 388 }
396 389
390#ifdef CONFIG_LITMUS_NVIDIA
391 INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
392#endif
393
397#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) 394#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
398 init_gpu_affinity_state(p); 395 init_gpu_affinity_state(p);
399#endif 396#endif
@@ -494,15 +491,20 @@ out:
494 return retval; 491 return retval;
495} 492}
496 493
497void litmus_exit_task(struct task_struct* tsk) 494void litmus_pre_exit_task(struct task_struct* tsk)
498{ 495{
499 if (is_realtime(tsk)) { 496 if (is_realtime(tsk)) {
500 sched_trace_task_completion(tsk, 1);
501
502 if (tsk_rt(tsk)->rsrc_exit_cb) { 497 if (tsk_rt(tsk)->rsrc_exit_cb) {
503 int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk); 498 int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk);
504 WARN_ON(ret != 0); 499 WARN_ON(ret != 0);
505 } 500 }
501 }
502}
503
504void litmus_exit_task(struct task_struct* tsk)
505{
506 if (is_realtime(tsk)) {
507 sched_trace_task_completion(tsk, 1);
506 508
507 litmus->task_exit(tsk); 509 litmus->task_exit(tsk);
508 510
@@ -637,8 +639,10 @@ void exit_litmus(struct task_struct *dead_tsk)
637 } 639 }
638 640
639 /* main cleanup only for RT tasks */ 641 /* main cleanup only for RT tasks */
640 if (is_realtime(dead_tsk)) 642 if (is_realtime(dead_tsk)) {
643 litmus_pre_exit_task(dead_tsk); /* todo: double check that no Linux rq lock is held */
641 litmus_exit_task(dead_tsk); 644 litmus_exit_task(dead_tsk);
645 }
642} 646}
643 647
644 648
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 9c5ecab5e8d9..be06405021c5 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -1163,3 +1163,7 @@ int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_th
1163} 1163}
1164EXPORT_SYMBOL(__litmus_schedule_work); 1164EXPORT_SYMBOL(__litmus_schedule_work);
1165 1165
1166
1167
1168
1169
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d38b168d9ba..059a7e7ac715 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -258,6 +258,8 @@ void dump_nvidia_info(const struct tasklet_struct *t)
258static struct module* nvidia_mod = NULL; 258static struct module* nvidia_mod = NULL;
259 259
260 260
261
262
261#if 0 263#if 0
262static int nvidia_ready_module_notify(struct notifier_block *self, 264static int nvidia_ready_module_notify(struct notifier_block *self,
263 unsigned long val, void *data) 265 unsigned long val, void *data)
@@ -390,6 +392,10 @@ typedef struct {
390 struct task_struct* thread; 392 struct task_struct* thread;
391 int ready:1; /* todo: make threads check for the ready flag */ 393 int ready:1; /* todo: make threads check for the ready flag */
392#endif 394#endif
395
396#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
397 struct tasklet_struct nv_klmirqd_dbg_tasklet;
398#endif
393}nv_device_registry_t; 399}nv_device_registry_t;
394 400
395 401
@@ -397,8 +403,6 @@ static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
397 403
398 404
399 405
400
401
402#ifdef CONFIG_LITMUS_SOFTIRQD 406#ifdef CONFIG_LITMUS_SOFTIRQD
403static int nvidia_klmirqd_cb(void *arg) 407static int nvidia_klmirqd_cb(void *arg)
404{ 408{
@@ -417,6 +421,63 @@ static int nvidia_klmirqd_cb(void *arg)
417} 421}
418#endif 422#endif
419 423
424#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
425struct nv_klmirqd_dbg_timer_struct
426{
427 struct hrtimer timer;
428};
429
430static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer;
431
432static void nv_klmirqd_arm_dbg_timer(lt_t relative_time)
433{
434 lt_t when_to_fire = litmus_clock() + relative_time;
435
436 TRACE("next nv tasklet in %d ns\n", relative_time);
437
438 __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer,
439 ns_to_ktime(when_to_fire),
440 0,
441 HRTIMER_MODE_ABS_PINNED,
442 0);
443}
444
445static void nv_klmirqd_dbg_tasklet_func(unsigned long arg)
446{
447 lt_t now = litmus_clock();
448 nv_device_registry_t *reg = (nv_device_registry_t*)arg;
449 int gpunum = reg - &NV_DEVICE_REG[0];
450
451 TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum);
452
453 /* set up the next timer */
454 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
455}
456
457
458static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer)
459{
460 lt_t now = litmus_clock();
461 int gpu = (int)(now % num_online_gpus());
462 nv_device_registry_t *reg;
463
464 TRACE("nvklmirqd_timer invoked!\n");
465
466 reg = &NV_DEVICE_REG[gpu];
467
468 if (reg->thread && reg->ready) {
469 TRACE("Adding a tasklet for GPU %d\n", gpu);
470 litmus_tasklet_schedule(&reg->nv_klmirqd_dbg_tasklet, reg->thread);
471 }
472 else {
473 TRACE("nv klmirqd is not ready!\n");
474 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
475 }
476
477 return HRTIMER_NORESTART;
478}
479#endif
480
420 481
421static int gpu_owner_max_priority_order(struct binheap_node *a, 482static int gpu_owner_max_priority_order(struct binheap_node *a,
422 struct binheap_node *b) 483 struct binheap_node *b)
@@ -451,6 +512,10 @@ static int init_nv_device_reg(void)
451 raw_spin_lock_init(&NV_DEVICE_REG[i].lock); 512 raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
452 INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); 513 INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
453 514
515#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
516 tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]);
517#endif
518
454#ifdef CONFIG_LITMUS_SOFTIRQD 519#ifdef CONFIG_LITMUS_SOFTIRQD
455 { 520 {
456 int default_cpu = litmus->map_gpu_to_cpu(i); 521 int default_cpu = litmus->map_gpu_to_cpu(i);
@@ -466,6 +531,12 @@ static int init_nv_device_reg(void)
466#endif 531#endif
467 } 532 }
468 533
534#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
535 hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
536 nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func;
537 nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000);
538#endif
539
469 return(1); 540 return(1);
470} 541}
471 542
@@ -578,7 +649,7 @@ static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct tas
578 649
579 650
580 651
581/* call when an aux_owner becomes real-time */ 652/* call when an gpu owner becomes real-time */
582long enable_gpu_owner(struct task_struct *t) 653long enable_gpu_owner(struct task_struct *t)
583{ 654{
584 long retval = 0; 655 long retval = 0;
@@ -631,7 +702,7 @@ out:
631 return retval; 702 return retval;
632} 703}
633 704
634/* call when an aux_owner exits real-time */ 705/* call when an gpu owner exits real-time */
635long disable_gpu_owner(struct task_struct *t) 706long disable_gpu_owner(struct task_struct *t)
636{ 707{
637 long retval = 0; 708 long retval = 0;
@@ -773,9 +844,9 @@ int gpu_owner_decrease_priority(struct task_struct *t)
773 844
774 gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); 845 gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
775 846
776 if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { 847 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
777 WARN_ON(!is_running(t)); 848 WARN_ON(!is_running(t));
778 TRACE_CUR("aux tasks may not inherit from %s/%d on GPU %d\n", 849 TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n",
779 t->comm, t->pid, gpu); 850 t->comm, t->pid, gpu);
780 goto out; 851 goto out;
781 } 852 }
@@ -865,6 +936,10 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
865 936
866 937
867 938
939
940
941
942
868#ifdef CONFIG_LITMUS_PAI_SOFTIRQD 943#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
869//void pai_check_priority_increase(struct task_struct *t, int reg_device_id) 944//void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
870//{ 945//{
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 46de8041cf59..a454832b2aa8 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1717,17 +1717,17 @@ static long cedf_activate_plugin(void)
1717 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", 1717 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
1718 num_clusters, cluster_size); 1718 num_clusters, cluster_size);
1719 1719
1720 1720
1721#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) 1721#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1722 num_gpu_clusters = min(num_clusters, num_online_gpus()); 1722 num_gpu_clusters = min(num_clusters, num_online_gpus());
1723 gpu_cluster_size = num_online_gpus() / num_gpu_clusters; 1723 gpu_cluster_size = num_online_gpus() / num_gpu_clusters;
1724 1724
1725 if (((num_online_gpus() % gpu_cluster_size) != 0) || 1725 if (((num_online_gpus() % gpu_cluster_size) != 0) ||
1726 (num_gpu_clusters != num_clusters)) { 1726 (num_gpu_clusters != num_clusters)) {
1727 printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n"); 1727 printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n");
1728 } 1728 }
1729#endif 1729#endif
1730 1730
1731 /* initialize clusters */ 1731 /* initialize clusters */
1732 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); 1732 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
1733 for (i = 0; i < num_clusters; i++) { 1733 for (i = 0; i < num_clusters; i++) {