aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/sched_cedf.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-01-10 16:21:07 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2013-01-10 16:21:07 -0500
commit8d00682ce5ddaedfb62287773d21c727f08fda69 (patch)
tree61a4b7bac5960c6f0ab25fe087404e9ca1725e05 /litmus/sched_cedf.c
parentfdf0a6b73001976c5d02d631ebdd0927819d7c91 (diff)
parent1235a665a5e00dc762e6646c01381b3ed5019d86 (diff)
Merge branch 'wip-gpu-cleanup' into wip-2012.3-gpu
Conflicts: include/litmus/fpmath.h include/litmus/litmus.h include/litmus/rt_param.h include/litmus/trace.h kernel/sched.c kernel/softirq.c litmus/edf_common.c litmus/jobs.c litmus/litmus.c litmus/locking.c litmus/preempt.c litmus/sched_cedf.c litmus/sched_gsn_edf.c litmus/sched_litmus.c litmus/sync.c
Diffstat (limited to 'litmus/sched_cedf.c')
-rw-r--r--litmus/sched_cedf.c1138
1 files changed, 1084 insertions, 54 deletions
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index b45b46fc4fca..db47f4413329 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,7 +29,7 @@
29#include <linux/percpu.h> 29#include <linux/percpu.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32 32#include <linux/uaccess.h>
33#include <linux/module.h> 33#include <linux/module.h>
34 34
35#include <litmus/litmus.h> 35#include <litmus/litmus.h>
@@ -43,14 +43,48 @@
43#include <litmus/clustered.h> 43#include <litmus/clustered.h>
44 44
45#include <litmus/bheap.h> 45#include <litmus/bheap.h>
46#include <litmus/binheap.h>
47#include <litmus/trace.h>
48
49#ifdef CONFIG_LITMUS_LOCKING
50#include <litmus/kfmlp_lock.h>
51#endif
52
53#ifdef CONFIG_LITMUS_NESTED_LOCKING
54#include <litmus/rsm_lock.h>
55#include <litmus/ikglp_lock.h>
56#endif
46 57
47#ifdef CONFIG_SCHED_CPU_AFFINITY 58#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h> 59#include <litmus/affinity.h>
49#endif 60#endif
50 61
62#ifdef CONFIG_REALTIME_AUX_TASKS
63#include <litmus/aux_tasks.h>
64#endif
65
51/* to configure the cluster size */ 66/* to configure the cluster size */
52#include <litmus/litmus_proc.h> 67#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h> 68
69#ifdef CONFIG_SCHED_CPU_AFFINITY
70#include <litmus/affinity.h>
71#endif
72
73#ifdef CONFIG_LITMUS_SOFTIRQD
74#include <litmus/litmus_softirq.h>
75#endif
76
77#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
78#include <linux/interrupt.h>
79#endif
80
81#ifdef CONFIG_LITMUS_NVIDIA
82#include <litmus/nvidia_info.h>
83#endif
84
85#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
86#include <litmus/gpu_affinity.h>
87#endif
54 88
55/* Reference configuration variable. Determines which cache level is used to 89/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that 90 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
@@ -71,7 +105,7 @@ typedef struct {
71 struct task_struct* linked; /* only RT tasks */ 105 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */ 106 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */ 107 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn; 108 struct binheap_node hn;
75} cpu_entry_t; 109} cpu_entry_t;
76 110
77/* one cpu_entry_t per CPU */ 111/* one cpu_entry_t per CPU */
@@ -97,10 +131,17 @@ typedef struct clusterdomain {
97 /* map of this cluster cpus */ 131 /* map of this cluster cpus */
98 cpumask_var_t cpu_map; 132 cpumask_var_t cpu_map;
99 /* the cpus queue themselves according to priority in here */ 133 /* the cpus queue themselves according to priority in here */
100 struct bheap_node *heap_node; 134 struct binheap cpu_heap;
101 struct bheap cpu_heap;
102 /* lock for this cluster */ 135 /* lock for this cluster */
103#define cluster_lock domain.ready_lock 136#define cluster_lock domain.ready_lock
137
138#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
139 struct tasklet_head pending_tasklets;
140#endif
141
142#ifdef CONFIG_LITMUS_DGL_SUPPORT
143 raw_spinlock_t dgl_lock;
144#endif
104} cedf_domain_t; 145} cedf_domain_t;
105 146
106/* a cedf_domain per cluster; allocation is done at init/activation time */ 147/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -109,6 +150,29 @@ cedf_domain_t *cedf;
109#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) 150#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
110#define task_cpu_cluster(task) remote_cluster(get_partition(task)) 151#define task_cpu_cluster(task) remote_cluster(get_partition(task))
111 152
153/* total number of cluster */
154static int num_clusters;
155/* we do not support cluster of different sizes */
156static unsigned int cluster_size;
157
158static int clusters_allocated = 0;
159
160
161#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
162static int num_gpu_clusters;
163static unsigned int gpu_cluster_size;
164#endif
165
166
167#ifdef CONFIG_LITMUS_DGL_SUPPORT
168static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
169{
170 cedf_domain_t *cluster = task_cpu_cluster(t);
171 return(&cluster->dgl_lock);
172}
173#endif
174
175
112/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling 176/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
113 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose 177 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
114 * information during the initialization of the plugin (e.g., topology) 178 * information during the initialization of the plugin (e.g., topology)
@@ -116,11 +180,11 @@ cedf_domain_t *cedf;
116 */ 180 */
117#define VERBOSE_INIT 181#define VERBOSE_INIT
118 182
119static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 183static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
120{ 184{
121 cpu_entry_t *a, *b; 185 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
122 a = _a->value; 186 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
123 b = _b->value; 187
124 /* Note that a and b are inverted: we want the lowest-priority CPU at 188 /* Note that a and b are inverted: we want the lowest-priority CPU at
125 * the top of the heap. 189 * the top of the heap.
126 */ 190 */
@@ -134,20 +198,17 @@ static void update_cpu_position(cpu_entry_t *entry)
134{ 198{
135 cedf_domain_t *cluster = entry->cluster; 199 cedf_domain_t *cluster = entry->cluster;
136 200
137 if (likely(bheap_node_in_heap(entry->hn))) 201 if (likely(binheap_is_in_heap(&entry->hn))) {
138 bheap_delete(cpu_lower_prio, 202 binheap_delete(&entry->hn, &cluster->cpu_heap);
139 &cluster->cpu_heap, 203 }
140 entry->hn);
141 204
142 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); 205 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
143} 206}
144 207
145/* caller must hold cedf lock */ 208/* caller must hold cedf lock */
146static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) 209static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
147{ 210{
148 struct bheap_node* hn; 211 return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
149 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
150 return hn->value;
151} 212}
152 213
153 214
@@ -209,7 +270,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
209} 270}
210 271
211/* unlink - Make sure a task is not linked any longer to an entry 272/* unlink - Make sure a task is not linked any longer to an entry
212 * where it was linked before. Must hold cedf_lock. 273 * where it was linked before. Must hold cluster_lock.
213 */ 274 */
214static noinline void unlink(struct task_struct* t) 275static noinline void unlink(struct task_struct* t)
215{ 276{
@@ -245,7 +306,7 @@ static void preempt(cpu_entry_t *entry)
245} 306}
246 307
247/* requeue - Put an unlinked task into gsn-edf domain. 308/* requeue - Put an unlinked task into gsn-edf domain.
248 * Caller must hold cedf_lock. 309 * Caller must hold cluster_lock.
249 */ 310 */
250static noinline void requeue(struct task_struct* task) 311static noinline void requeue(struct task_struct* task)
251{ 312{
@@ -255,7 +316,15 @@ static noinline void requeue(struct task_struct* task)
255 BUG_ON(is_queued(task)); 316 BUG_ON(is_queued(task));
256 317
257 if (is_released(task, litmus_clock())) 318 if (is_released(task, litmus_clock()))
258 __add_ready(&cluster->domain, task); 319#ifdef CONFIG_REALTIME_AUX_TASKS
320 if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
321 /* aux_task probably transitioned to real-time while it was blocked */
322 TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
323 unlink(task); /* really needed? */
324 }
325 else
326#endif
327 __add_ready(&cluster->domain, task);
259 else { 328 else {
260 /* it has got to wait */ 329 /* it has got to wait */
261 add_release(&cluster->domain, task); 330 add_release(&cluster->domain, task);
@@ -340,13 +409,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
340 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 409 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
341} 410}
342 411
343/* caller holds cedf_lock */ 412/* caller holds cluster_lock */
344static noinline void job_completion(struct task_struct *t, int forced) 413static noinline void job_completion(struct task_struct *t, int forced)
345{ 414{
346 BUG_ON(!t); 415 BUG_ON(!t);
347 416
348 sched_trace_task_completion(t, forced); 417 sched_trace_task_completion(t, forced);
349 418
419#ifdef CONFIG_LITMUS_NVIDIA
420 atomic_set(&tsk_rt(t)->nv_int_count, 0);
421#endif
422
350 TRACE_TASK(t, "job_completion().\n"); 423 TRACE_TASK(t, "job_completion().\n");
351 424
352 /* set flags */ 425 /* set flags */
@@ -371,25 +444,341 @@ static noinline void job_completion(struct task_struct *t, int forced)
371 */ 444 */
372static void cedf_tick(struct task_struct* t) 445static void cedf_tick(struct task_struct* t)
373{ 446{
374 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 447 if (is_realtime(t) && budget_exhausted(t))
375 if (!is_np(t)) { 448 {
376 /* np tasks will be preempted when they become 449 if (budget_signalled(t) && !sigbudget_sent(t)) {
377 * preemptable again 450 /* signal exhaustion */
378 */ 451 send_sigbudget(t);
379 litmus_reschedule_local(); 452 }
380 set_will_schedule(); 453
381 TRACE("cedf_scheduler_tick: " 454 if (budget_enforced(t)) {
382 "%d is preemptable " 455 if (!is_np(t)) {
383 " => FORCE_RESCHED\n", t->pid); 456 /* np tasks will be preempted when they become
384 } else if (is_user_np(t)) { 457 * preemptable again
385 TRACE("cedf_scheduler_tick: " 458 */
386 "%d is non-preemptable, " 459 litmus_reschedule_local();
387 "preemption delayed.\n", t->pid); 460 set_will_schedule();
388 request_exit_np(t); 461 TRACE("cedf_scheduler_tick: "
462 "%d is preemptable "
463 " => FORCE_RESCHED\n", t->pid);
464 } else if (is_user_np(t)) {
465 TRACE("cedf_scheduler_tick: "
466 "%d is non-preemptable, "
467 "preemption delayed.\n", t->pid);
468 request_exit_np(t);
469 }
389 } 470 }
390 } 471 }
391} 472}
392 473
474
475
476
477
478
479
480
481
482
483
484
485
486#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
487
488
489static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
490{
491 if (!atomic_read(&tasklet->count)) {
492 if(tasklet->owner) {
493 sched_trace_tasklet_begin(tasklet->owner);
494 }
495
496 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
497 {
498 BUG();
499 }
500 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
501 __FUNCTION__,
502 (tasklet->owner) ? tasklet->owner->pid : -1,
503 (tasklet->owner) ? 0 : 1);
504 tasklet->func(tasklet->data);
505 tasklet_unlock(tasklet);
506
507 if(tasklet->owner) {
508 sched_trace_tasklet_end(tasklet->owner, flushed);
509 }
510 }
511 else {
512 BUG();
513 }
514}
515
516
517static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
518{
519 int work_to_do = 1;
520 struct tasklet_struct *tasklet = NULL;
521 unsigned long flags;
522
523 while(work_to_do) {
524
525 TS_NV_SCHED_BOTISR_START;
526
527 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
528
529 if(cluster->pending_tasklets.head != NULL) {
530 // remove tasklet at head.
531 struct tasklet_struct *prev = NULL;
532 tasklet = cluster->pending_tasklets.head;
533
534 // find a tasklet with prio to execute; skip ones where
535 // sched_task has a higher priority.
536 // We use the '!edf' test instead of swaping function arguments since
537 // both sched_task and owner could be NULL. In this case, we want to
538 // still execute the tasklet.
539 while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
540 prev = tasklet;
541 tasklet = tasklet->next;
542 }
543
544 if(tasklet) { // found something to execuite
545 // remove the tasklet from the queue
546 if(prev) {
547 prev->next = tasklet->next;
548 if(prev->next == NULL) {
549 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
550 cluster->pending_tasklets.tail = &(prev);
551 }
552 }
553 else {
554 cluster->pending_tasklets.head = tasklet->next;
555 if(tasklet->next == NULL) {
556 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
557 cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
558 }
559 }
560 }
561 else {
562 TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
563 }
564 }
565 else {
566 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
567 }
568
569 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
570
571 if(tasklet) {
572 __do_lit_tasklet(tasklet, 0ul);
573 tasklet = NULL;
574 }
575 else {
576 work_to_do = 0;
577 }
578
579 TS_NV_SCHED_BOTISR_END;
580 }
581}
582
583static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
584{
585 struct tasklet_struct* step;
586
587 tasklet->next = NULL; // make sure there are no old values floating around
588
589 step = cluster->pending_tasklets.head;
590 if(step == NULL) {
591 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
592 // insert at tail.
593 *(cluster->pending_tasklets.tail) = tasklet;
594 cluster->pending_tasklets.tail = &(tasklet->next);
595 }
596 else if((*(cluster->pending_tasklets.tail) != NULL) &&
597 edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
598 // insert at tail.
599 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
600
601 *(cluster->pending_tasklets.tail) = tasklet;
602 cluster->pending_tasklets.tail = &(tasklet->next);
603 }
604 else {
605
606 // insert the tasklet somewhere in the middle.
607
608 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
609
610 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
611 step = step->next;
612 }
613
614 // insert tasklet right before step->next.
615
616 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
617 tasklet->owner->pid,
618 (step->owner) ?
619 step->owner->pid :
620 -1,
621 (step->next) ?
622 ((step->next->owner) ?
623 step->next->owner->pid :
624 -1) :
625 -1);
626
627 tasklet->next = step->next;
628 step->next = tasklet;
629
630 // patch up the head if needed.
631 if(cluster->pending_tasklets.head == step)
632 {
633 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
634 cluster->pending_tasklets.head = tasklet;
635 }
636 }
637}
638
639static void cedf_run_tasklets(struct task_struct* sched_task)
640{
641 cedf_domain_t* cluster;
642
643 preempt_disable();
644
645 cluster = (is_realtime(sched_task)) ?
646 task_cpu_cluster(sched_task) :
647 remote_cluster(smp_processor_id());
648
649 if(cluster && cluster->pending_tasklets.head != NULL) {
650 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
651 do_lit_tasklets(cluster, sched_task);
652 }
653
654 preempt_enable_no_resched();
655}
656
657
658
659static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
660{
661#if 0
662 cedf_domain_t *cluster = NULL;
663 cpu_entry_t *targetCPU = NULL;
664 int thisCPU;
665 int runLocal = 0;
666 int runNow = 0;
667 unsigned long flags;
668
669 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
670 {
671 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
672 return 0;
673 }
674
675 cluster = task_cpu_cluster(tasklet->owner);
676
677 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
678
679 thisCPU = smp_processor_id();
680
681#ifdef CONFIG_SCHED_CPU_AFFINITY
682 {
683 cpu_entry_t* affinity = NULL;
684
685 // use this CPU if it is in our cluster and isn't running any RT work.
686 if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
687 affinity = &(__get_cpu_var(cedf_cpu_entries));
688 }
689 else {
690 // this CPU is busy or shouldn't run tasklet in this cluster.
691 // look for available near by CPUs.
692 // NOTE: Affinity towards owner and not this CPU. Is this right?
693 affinity =
694 cedf_get_nearest_available_cpu(cluster,
695 &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
696 }
697
698 targetCPU = affinity;
699 }
700#endif
701
702 if (targetCPU == NULL) {
703 targetCPU = lowest_prio_cpu(cluster);
704 }
705
706 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
707 if (thisCPU == targetCPU->cpu) {
708 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
709 runLocal = 1;
710 runNow = 1;
711 }
712 else {
713 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
714 runLocal = 0;
715 runNow = 1;
716 }
717 }
718 else {
719 runLocal = 0;
720 runNow = 0;
721 }
722
723 if(!runLocal) {
724 // enqueue the tasklet
725 __add_pai_tasklet(tasklet, cluster);
726 }
727
728 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
729
730
731 if (runLocal /*&& runNow */) { // runNow == 1 is implied
732 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
733 __do_lit_tasklet(tasklet, 0ul);
734 }
735 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
736 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
737 preempt(targetCPU); // need to be protected by cluster_lock?
738 }
739 else {
740 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
741 }
742#else
743 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
744 __do_lit_tasklet(tasklet, 0ul);
745#endif
746 return(1); // success
747}
748
749static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
750 struct task_struct *new_prio)
751{
752 struct tasklet_struct* step;
753 unsigned long flags;
754 cedf_domain_t *cluster;
755 struct task_struct *probe;
756
757 // identify the cluster by the assignment of these tasks. one should
758 // be non-NULL.
759 probe = (old_prio) ? old_prio : new_prio;
760
761 if(probe) {
762 cluster = task_cpu_cluster(probe);
763
764 if(cluster->pending_tasklets.head != NULL) {
765 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
766 for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
767 if(step->owner == old_prio) {
768 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
769 step->owner = new_prio;
770 }
771 }
772 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
773 }
774 }
775 else {
776 TRACE("%s: Both priorities were NULL\n");
777 }
778}
779
780#endif // PAI
781
393/* Getting schedule() right is a bit tricky. schedule() may not make any 782/* Getting schedule() right is a bit tricky. schedule() may not make any
394 * assumptions on the state of the current task since it may be called for a 783 * assumptions on the state of the current task since it may be called for a
395 * number of reasons. The reasons include a scheduler_tick() determined that it 784 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -415,7 +804,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
415{ 804{
416 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 805 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
417 cedf_domain_t *cluster = entry->cluster; 806 cedf_domain_t *cluster = entry->cluster;
418 int out_of_time, sleep, preempt, np, exists, blocks; 807 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
419 struct task_struct* next = NULL; 808 struct task_struct* next = NULL;
420 809
421#ifdef CONFIG_RELEASE_MASTER 810#ifdef CONFIG_RELEASE_MASTER
@@ -442,6 +831,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
442 out_of_time = exists && 831 out_of_time = exists &&
443 budget_enforced(entry->scheduled) && 832 budget_enforced(entry->scheduled) &&
444 budget_exhausted(entry->scheduled); 833 budget_exhausted(entry->scheduled);
834 signal_budget = exists &&
835 budget_signalled(entry->scheduled) &&
836 budget_exhausted(entry->scheduled) &&
837 !sigbudget_sent(entry->scheduled);
445 np = exists && is_np(entry->scheduled); 838 np = exists && is_np(entry->scheduled);
446 sleep = exists && is_completed(entry->scheduled); 839 sleep = exists && is_completed(entry->scheduled);
447 preempt = entry->scheduled != entry->linked; 840 preempt = entry->scheduled != entry->linked;
@@ -460,12 +853,28 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
460 TRACE_TASK(prev, "will be preempted by %s/%d\n", 853 TRACE_TASK(prev, "will be preempted by %s/%d\n",
461 entry->linked->comm, entry->linked->pid); 854 entry->linked->comm, entry->linked->pid);
462 855
856 /* Send the signal that the budget has been exhausted */
857 if (signal_budget)
858 send_sigbudget(entry->scheduled);
463 859
464 /* If a task blocks we have no choice but to reschedule. 860 /* If a task blocks we have no choice but to reschedule.
465 */ 861 */
466 if (blocks) 862 if (blocks)
467 unlink(entry->scheduled); 863 unlink(entry->scheduled);
468 864
865#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
866 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
867 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
868 // don't track preemptions or locking protocol suspensions.
869 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
870 stop_gpu_tracker(entry->scheduled);
871 }
872 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
873 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
874 }
875 }
876#endif
877
469 /* Request a sys_exit_np() call if we would like to preempt but cannot. 878 /* Request a sys_exit_np() call if we would like to preempt but cannot.
470 * We need to make sure to update the link structure anyway in case 879 * We need to make sure to update the link structure anyway in case
471 * that we are still linked. Multiple calls to request_exit_np() don't 880 * that we are still linked. Multiple calls to request_exit_np() don't
@@ -515,7 +924,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
515 raw_spin_unlock(&cluster->cluster_lock); 924 raw_spin_unlock(&cluster->cluster_lock);
516 925
517#ifdef WANT_ALL_SCHED_EVENTS 926#ifdef WANT_ALL_SCHED_EVENTS
518 TRACE("cedf_lock released, next=0x%p\n", next); 927 TRACE("cluster_lock released, next=0x%p\n", next);
519 928
520 if (next) 929 if (next)
521 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 930 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -523,7 +932,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
523 TRACE("becomes idle at %llu.\n", litmus_clock()); 932 TRACE("becomes idle at %llu.\n", litmus_clock());
524#endif 933#endif
525 934
526
527 return next; 935 return next;
528} 936}
529 937
@@ -549,7 +957,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
549 cpu_entry_t* entry; 957 cpu_entry_t* entry;
550 cedf_domain_t* cluster; 958 cedf_domain_t* cluster;
551 959
552 TRACE("gsn edf: task new %d\n", t->pid); 960 TRACE("c-edf: task new %d\n", t->pid);
553 961
554 /* the cluster doesn't change even if t is running */ 962 /* the cluster doesn't change even if t is running */
555 cluster = task_cpu_cluster(t); 963 cluster = task_cpu_cluster(t);
@@ -587,7 +995,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
587static void cedf_task_wake_up(struct task_struct *task) 995static void cedf_task_wake_up(struct task_struct *task)
588{ 996{
589 unsigned long flags; 997 unsigned long flags;
590 lt_t now; 998 //lt_t now;
591 cedf_domain_t *cluster; 999 cedf_domain_t *cluster;
592 1000
593 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 1001 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -595,6 +1003,9 @@ static void cedf_task_wake_up(struct task_struct *task)
595 cluster = task_cpu_cluster(task); 1003 cluster = task_cpu_cluster(task);
596 1004
597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1005 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1006
1007#if 0
1008 /* sporadic task model. will increment job numbers automatically */
598 now = litmus_clock(); 1009 now = litmus_clock();
599 if (is_tardy(task, now)) { 1010 if (is_tardy(task, now)) {
600 /* new sporadic release */ 1011 /* new sporadic release */
@@ -608,6 +1019,26 @@ static void cedf_task_wake_up(struct task_struct *task)
608 tsk_rt(task)->completed = 0; 1019 tsk_rt(task)->completed = 0;
609 } 1020 }
610 } 1021 }
1022#else
1023 /* periodic task model. don't force job to end.
1024 * rely on user to say when jobs complete or when budget expires. */
1025 tsk_rt(task)->completed = 0;
1026#endif
1027
1028#ifdef CONFIG_REALTIME_AUX_TASKS
1029 if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) {
1030 TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
1031 disable_aux_task_owner(task);
1032 }
1033#endif
1034
1035#ifdef CONFIG_LITMUS_NVIDIA
1036 if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
1037 TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
1038 disable_gpu_owner(task);
1039 }
1040#endif
1041
611 cedf_job_arrival(task); 1042 cedf_job_arrival(task);
612 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1043 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
613} 1044}
@@ -623,7 +1054,25 @@ static void cedf_task_block(struct task_struct *t)
623 1054
624 /* unlink if necessary */ 1055 /* unlink if necessary */
625 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1056 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1057
626 unlink(t); 1058 unlink(t);
1059
1060#ifdef CONFIG_REALTIME_AUX_TASKS
1061 if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
1062
1063 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1064 enable_aux_task_owner(t);
1065 }
1066#endif
1067
1068#ifdef CONFIG_LITMUS_NVIDIA
1069 if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
1070
1071 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1072 enable_gpu_owner(t);
1073 }
1074#endif
1075
627 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1076 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
628 1077
629 BUG_ON(!is_realtime(t)); 1078 BUG_ON(!is_realtime(t));
@@ -635,8 +1084,30 @@ static void cedf_task_exit(struct task_struct * t)
635 unsigned long flags; 1084 unsigned long flags;
636 cedf_domain_t *cluster = task_cpu_cluster(t); 1085 cedf_domain_t *cluster = task_cpu_cluster(t);
637 1086
1087#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1088 cedf_change_prio_pai_tasklet(t, NULL);
1089#endif
1090
638 /* unlink if necessary */ 1091 /* unlink if necessary */
639 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1092 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1093
1094#ifdef CONFIG_REALTIME_AUX_TASKS
1095 /* make sure we clean up on our way out */
1096 if (unlikely(tsk_rt(t)->is_aux_task)) {
1097 exit_aux_task(t);
1098 }
1099 else if(tsk_rt(t)->has_aux_tasks) {
1100 disable_aux_task_owner(t);
1101 }
1102#endif
1103
1104#ifdef CONFIG_LITMUS_NVIDIA
1105 /* make sure we clean up on our way out */
1106 if(tsk_rt(t)->held_gpus) {
1107 disable_gpu_owner(t);
1108 }
1109#endif
1110
640 unlink(t); 1111 unlink(t);
641 if (tsk_rt(t)->scheduled_on != NO_CPU) { 1112 if (tsk_rt(t)->scheduled_on != NO_CPU) {
642 cpu_entry_t *cpu; 1113 cpu_entry_t *cpu;
@@ -652,13 +1123,505 @@ static void cedf_task_exit(struct task_struct * t)
652 1123
653static long cedf_admit_task(struct task_struct* tsk) 1124static long cedf_admit_task(struct task_struct* tsk)
654{ 1125{
1126#ifdef CONFIG_LITMUS_NESTED_LOCKING
1127 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1128 edf_max_heap_base_priority_order);
1129#endif
1130
655 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 1131 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
656} 1132}
657 1133
658/* total number of cluster */ 1134
659static int num_clusters; 1135
660/* we do not support cluster of different sizes */ 1136#ifdef CONFIG_LITMUS_LOCKING
661static unsigned int cluster_size; 1137
1138#include <litmus/fdso.h>
1139
1140
1141
1142/* called with IRQs off */
1143static int __increase_priority_inheritance(struct task_struct* t,
1144 struct task_struct* prio_inh)
1145{
1146 int success = 1;
1147 int linked_on;
1148 int check_preempt = 0;
1149 cedf_domain_t* cluster;
1150
1151 if (prio_inh && prio_inh == effective_priority(t)) {
1152 /* relationship already established. */
1153 TRACE_TASK(t, "already has effective priority of %s/%d\n",
1154 prio_inh->comm, prio_inh->pid);
1155 goto out;
1156 }
1157
1158 cluster = task_cpu_cluster(t);
1159
1160#ifdef CONFIG_LITMUS_NESTED_LOCKING
1161 /* this sanity check allows for weaker locking in protocols */
1162 /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
1163 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1164#endif
1165 TRACE_TASK(t, "inherits priority from %s/%d\n",
1166 prio_inh->comm, prio_inh->pid);
1167 tsk_rt(t)->inh_task = prio_inh;
1168
1169 linked_on = tsk_rt(t)->linked_on;
1170
1171 /* If it is scheduled, then we need to reorder the CPU heap. */
1172 if (linked_on != NO_CPU) {
1173 TRACE_TASK(t, "%s: linked on %d\n",
1174 __FUNCTION__, linked_on);
1175 /* Holder is scheduled; need to re-order CPUs.
1176 * We can't use heap_decrease() here since
1177 * the cpu_heap is ordered in reverse direction, so
1178 * it is actually an increase. */
1179 binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
1180 &cluster->cpu_heap);
1181 binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
1182 &cluster->cpu_heap, cpu_entry_t, hn);
1183
1184 } else {
1185 /* holder may be queued: first stop queue changes */
1186 raw_spin_lock(&cluster->domain.release_lock);
1187 if (is_queued(t)) {
1188 TRACE_TASK(t, "%s: is queued\n",
1189 __FUNCTION__);
1190 /* We need to update the position of holder in some
1191 * heap. Note that this could be a release heap if we
1192 * budget enforcement is used and this job overran. */
1193 check_preempt =
1194 !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
1195 } else {
1196 /* Nothing to do: if it is not queued and not linked
1197 * then it is either sleeping or currently being moved
1198 * by other code (e.g., a timer interrupt handler) that
1199 * will use the correct priority when enqueuing the
1200 * task. */
1201 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1202 __FUNCTION__);
1203 }
1204 raw_spin_unlock(&cluster->domain.release_lock);
1205
1206 /* If holder was enqueued in a release heap, then the following
1207 * preemption check is pointless, but we can't easily detect
1208 * that case. If you want to fix this, then consider that
1209 * simply adding a state flag requires O(n) time to update when
1210 * releasing n tasks, which conflicts with the goal to have
1211 * O(log n) merges. */
1212 if (check_preempt) {
1213 /* heap_decrease() hit the top level of the heap: make
1214 * sure preemption checks get the right task, not the
1215 * potentially stale cache. */
1216 bheap_uncache_min(edf_ready_order,
1217 &cluster->domain.ready_queue);
1218 check_for_preemptions(cluster);
1219 }
1220
1221#ifdef CONFIG_REALTIME_AUX_TASKS
1222 /* propagate to aux tasks */
1223 if (tsk_rt(t)->has_aux_tasks) {
1224 aux_task_owner_increase_priority(t);
1225 }
1226#endif
1227
1228#ifdef CONFIG_LITMUS_NVIDIA
1229 /* propagate to gpu klmirqd */
1230 if (tsk_rt(t)->held_gpus) {
1231 gpu_owner_increase_priority(t);
1232 }
1233#endif
1234 }
1235#ifdef CONFIG_LITMUS_NESTED_LOCKING
1236 }
1237 else {
1238 TRACE_TASK(t, "Spurious invalid priority increase. "
1239 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1240 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1241 t->comm, t->pid,
1242 effective_priority(t)->comm, effective_priority(t)->pid,
1243 (prio_inh) ? prio_inh->comm : "nil",
1244 (prio_inh) ? prio_inh->pid : -1);
1245 WARN_ON(!prio_inh);
1246 success = 0;
1247 }
1248#endif
1249
1250out:
1251 return success;
1252}
1253
1254/* called with IRQs off */
1255static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1256{
1257 cedf_domain_t* cluster = task_cpu_cluster(t);
1258
1259 raw_spin_lock(&cluster->cluster_lock);
1260
1261 __increase_priority_inheritance(t, prio_inh);
1262
1263 raw_spin_unlock(&cluster->cluster_lock);
1264
1265#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1266 if(tsk_rt(t)->held_gpus) {
1267 int i;
1268 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1269 i < NV_DEVICE_NUM;
1270 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1271 pai_check_priority_increase(t, i);
1272 }
1273 }
1274#endif
1275}
1276
1277/* called with IRQs off */
1278static int __decrease_priority_inheritance(struct task_struct* t,
1279 struct task_struct* prio_inh)
1280{
1281 int success = 1;
1282
1283 if (prio_inh == tsk_rt(t)->inh_task) {
1284 /* relationship already established. */
1285 TRACE_TASK(t, "already inherits priority from %s/%d\n",
1286 (prio_inh) ? prio_inh->comm : "(nil)",
1287 (prio_inh) ? prio_inh->pid : 0);
1288 goto out;
1289 }
1290
1291#ifdef CONFIG_LITMUS_NESTED_LOCKING
1292 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1293#endif
1294 /* A job only stops inheriting a priority when it releases a
1295 * resource. Thus we can make the following assumption.*/
1296 if(prio_inh)
1297 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1298 prio_inh->comm, prio_inh->pid);
1299 else
1300 TRACE_TASK(t, "base priority restored.\n");
1301
1302 tsk_rt(t)->inh_task = prio_inh;
1303
1304 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1305 TRACE_TASK(t, "is scheduled.\n");
1306
1307 /* Check if rescheduling is necessary. We can't use heap_decrease()
1308 * since the priority was effectively lowered. */
1309 unlink(t);
1310 cedf_job_arrival(t);
1311 }
1312 else {
1313 cedf_domain_t* cluster = task_cpu_cluster(t);
1314 /* task is queued */
1315 raw_spin_lock(&cluster->domain.release_lock);
1316 if (is_queued(t)) {
1317 TRACE_TASK(t, "is queued.\n");
1318
1319 /* decrease in priority, so we have to re-add to binomial heap */
1320 unlink(t);
1321 cedf_job_arrival(t);
1322 }
1323 else {
1324 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1325 }
1326 raw_spin_unlock(&cluster->domain.release_lock);
1327 }
1328
1329#ifdef CONFIG_REALTIME_AUX_TASKS
1330 /* propagate to aux tasks */
1331 if (tsk_rt(t)->has_aux_tasks) {
1332 aux_task_owner_decrease_priority(t);
1333 }
1334#endif
1335
1336#ifdef CONFIG_LITMUS_NVIDIA
1337 /* propagate to gpu */
1338 if (tsk_rt(t)->held_gpus) {
1339 gpu_owner_decrease_priority(t);
1340 }
1341#endif
1342
1343#ifdef CONFIG_LITMUS_NESTED_LOCKING
1344 }
1345 else {
1346 TRACE_TASK(t, "Spurious invalid priority decrease. "
1347 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1348 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1349 t->comm, t->pid,
1350 effective_priority(t)->comm, effective_priority(t)->pid,
1351 (prio_inh) ? prio_inh->comm : "nil",
1352 (prio_inh) ? prio_inh->pid : -1);
1353 success = 0;
1354 }
1355#endif
1356
1357out:
1358 return success;
1359}
1360
1361static void decrease_priority_inheritance(struct task_struct* t,
1362 struct task_struct* prio_inh)
1363{
1364 cedf_domain_t* cluster = task_cpu_cluster(t);
1365
1366 raw_spin_lock(&cluster->cluster_lock);
1367 __decrease_priority_inheritance(t, prio_inh);
1368
1369 raw_spin_unlock(&cluster->cluster_lock);
1370
1371#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1372 if(tsk_rt(t)->held_gpus) {
1373 int i;
1374 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1375 i < NV_DEVICE_NUM;
1376 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1377 pai_check_priority_decrease(t, i);
1378 }
1379 }
1380#endif
1381}
1382
1383
1384#ifdef CONFIG_LITMUS_NESTED_LOCKING
1385
1386/* called with IRQs off */
1387/* preconditions:
1388 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1389 (2) The lock 'to_unlock' is held.
1390 */
1391static void nested_increase_priority_inheritance(struct task_struct* t,
1392 struct task_struct* prio_inh,
1393 raw_spinlock_t *to_unlock,
1394 unsigned long irqflags)
1395{
1396 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1397
1398 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1399 increase_priority_inheritance(t, prio_inh); // increase our prio.
1400 }
1401
1402 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1403
1404
1405 if(blocked_lock) {
1406 if(blocked_lock->ops->propagate_increase_inheritance) {
1407 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1408 blocked_lock->ident);
1409
1410 // beware: recursion
1411 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1412 t, to_unlock,
1413 irqflags);
1414 }
1415 else {
1416 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1417 blocked_lock->ident);
1418 unlock_fine_irqrestore(to_unlock, irqflags);
1419 }
1420 }
1421 else {
1422 TRACE_TASK(t, "is not blocked. No propagation.\n");
1423 unlock_fine_irqrestore(to_unlock, irqflags);
1424 }
1425}
1426
1427/* called with IRQs off */
1428/* preconditions:
1429 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1430 (2) The lock 'to_unlock' is held.
1431 */
1432static void nested_decrease_priority_inheritance(struct task_struct* t,
1433 struct task_struct* prio_inh,
1434 raw_spinlock_t *to_unlock,
1435 unsigned long irqflags)
1436{
1437 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1438 decrease_priority_inheritance(t, prio_inh);
1439
1440 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1441
1442 if(blocked_lock) {
1443 if(blocked_lock->ops->propagate_decrease_inheritance) {
1444 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1445 blocked_lock->ident);
1446
1447 // beware: recursion
1448 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1449 to_unlock,
1450 irqflags);
1451 }
1452 else {
1453 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1454 blocked_lock);
1455 unlock_fine_irqrestore(to_unlock, irqflags);
1456 }
1457 }
1458 else {
1459 TRACE_TASK(t, "is not blocked. No propagation.\n");
1460 unlock_fine_irqrestore(to_unlock, irqflags);
1461 }
1462}
1463
1464
1465/* ******************** RSM MUTEX ********************** */
1466
1467static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
1468 .lock = rsm_mutex_lock,
1469 .unlock = rsm_mutex_unlock,
1470 .close = rsm_mutex_close,
1471 .deallocate = rsm_mutex_free,
1472
1473 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1474 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1475
1476#ifdef CONFIG_LITMUS_DGL_SUPPORT
1477 .dgl_lock = rsm_mutex_dgl_lock,
1478 .is_owner = rsm_mutex_is_owner,
1479 .enable_priority = rsm_mutex_enable_priority,
1480#endif
1481};
1482
1483static struct litmus_lock* cedf_new_rsm_mutex(void)
1484{
1485 return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
1486}
1487
1488/* ******************** IKGLP ********************** */
1489
1490static struct litmus_lock_ops cedf_ikglp_lock_ops = {
1491 .lock = ikglp_lock,
1492 .unlock = ikglp_unlock,
1493 .close = ikglp_close,
1494 .deallocate = ikglp_free,
1495
1496 // ikglp can only be an outer-most lock.
1497 .propagate_increase_inheritance = NULL,
1498 .propagate_decrease_inheritance = NULL,
1499};
1500
1501static struct litmus_lock* cedf_new_ikglp(void* __user arg)
1502{
1503 // assumes clusters of uniform size.
1504 return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
1505}
1506
1507#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1508
1509
1510
1511
1512/* ******************** KFMLP support ********************** */
1513
1514static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
1515 .lock = kfmlp_lock,
1516 .unlock = kfmlp_unlock,
1517 .close = kfmlp_close,
1518 .deallocate = kfmlp_free,
1519
1520 // kfmlp can only be an outer-most lock.
1521 .propagate_increase_inheritance = NULL,
1522 .propagate_decrease_inheritance = NULL,
1523};
1524
1525
1526static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
1527{
1528 return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
1529}
1530
1531
1532/* **** lock constructor **** */
1533
1534static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1535 void* __user args)
1536{
1537 int err;
1538
1539 switch (type) {
1540#ifdef CONFIG_LITMUS_NESTED_LOCKING
1541 case RSM_MUTEX:
1542 *lock = cedf_new_rsm_mutex();
1543 break;
1544
1545 case IKGLP_SEM:
1546 *lock = cedf_new_ikglp(args);
1547 break;
1548#endif
1549 case KFMLP_SEM:
1550 *lock = cedf_new_kfmlp(args);
1551 break;
1552
1553 default:
1554 err = -ENXIO;
1555 goto UNSUPPORTED_LOCK;
1556 };
1557
1558 if (*lock)
1559 err = 0;
1560 else
1561 err = -ENOMEM;
1562
1563UNSUPPORTED_LOCK:
1564 return err;
1565}
1566
1567#endif // CONFIG_LITMUS_LOCKING
1568
1569
1570#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1571static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
1572 .close = kfmlp_aff_obs_close,
1573 .deallocate = kfmlp_aff_obs_free,
1574};
1575
1576#ifdef CONFIG_LITMUS_NESTED_LOCKING
1577static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
1578 .close = ikglp_aff_obs_close,
1579 .deallocate = ikglp_aff_obs_free,
1580};
1581#endif
1582
1583static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
1584 int type,
1585 void* __user args)
1586{
1587 int err;
1588
1589 switch (type) {
1590
1591 case KFMLP_SIMPLE_GPU_AFF_OBS:
1592 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1593 break;
1594
1595 case KFMLP_GPU_AFF_OBS:
1596 *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1597 break;
1598
1599#ifdef CONFIG_LITMUS_NESTED_LOCKING
1600 case IKGLP_SIMPLE_GPU_AFF_OBS:
1601 *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1602 break;
1603
1604 case IKGLP_GPU_AFF_OBS:
1605 *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1606 break;
1607#endif
1608 default:
1609 err = -ENXIO;
1610 goto UNSUPPORTED_AFF_OBS;
1611 };
1612
1613 if (*aff_obs)
1614 err = 0;
1615 else
1616 err = -ENOMEM;
1617
1618UNSUPPORTED_AFF_OBS:
1619 return err;
1620}
1621#endif
1622
1623
1624
662 1625
663#ifdef VERBOSE_INIT 1626#ifdef VERBOSE_INIT
664static void print_cluster_topology(cpumask_var_t mask, int cpu) 1627static void print_cluster_topology(cpumask_var_t mask, int cpu)
@@ -673,16 +1636,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
673} 1636}
674#endif 1637#endif
675 1638
676static int clusters_allocated = 0;
677
678static void cleanup_cedf(void) 1639static void cleanup_cedf(void)
679{ 1640{
680 int i; 1641 int i;
681 1642
1643#ifdef CONFIG_LITMUS_NVIDIA
1644 shutdown_nvidia_info();
1645#endif
1646
682 if (clusters_allocated) { 1647 if (clusters_allocated) {
683 for (i = 0; i < num_clusters; i++) { 1648 for (i = 0; i < num_clusters; i++) {
684 kfree(cedf[i].cpus); 1649 kfree(cedf[i].cpus);
685 kfree(cedf[i].heap_node);
686 free_cpumask_var(cedf[i].cpu_map); 1650 free_cpumask_var(cedf[i].cpu_map);
687 } 1651 }
688 1652
@@ -690,6 +1654,18 @@ static void cleanup_cedf(void)
690 } 1654 }
691} 1655}
692 1656
1657#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1658static int cedf_map_gpu_to_cpu(int gpu)
1659{
1660 int cpu_cluster = gpu / gpu_cluster_size;
1661 int default_cpu = cedf[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster
1662
1663 TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu);
1664
1665 return default_cpu;
1666}
1667#endif
1668
693static long cedf_activate_plugin(void) 1669static long cedf_activate_plugin(void)
694{ 1670{
695 int i, j, cpu, ccpu, cpu_count; 1671 int i, j, cpu, ccpu, cpu_count;
@@ -736,18 +1712,33 @@ static long cedf_activate_plugin(void)
736 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", 1712 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
737 num_clusters, cluster_size); 1713 num_clusters, cluster_size);
738 1714
1715
1716#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1717 num_gpu_clusters = min(num_clusters, num_online_gpus());
1718 gpu_cluster_size = num_online_gpus() / num_gpu_clusters;
1719
1720 if (((num_online_gpus() % gpu_cluster_size) != 0) ||
1721 (num_gpu_clusters != num_clusters)) {
1722 printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n");
1723 }
1724#endif
1725
739 /* initialize clusters */ 1726 /* initialize clusters */
740 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); 1727 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
741 for (i = 0; i < num_clusters; i++) { 1728 for (i = 0; i < num_clusters; i++) {
742 1729
743 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), 1730 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
744 GFP_ATOMIC); 1731 GFP_ATOMIC);
745 cedf[i].heap_node = kmalloc( 1732 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
746 cluster_size * sizeof(struct bheap_node),
747 GFP_ATOMIC);
748 bheap_init(&(cedf[i].cpu_heap));
749 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1733 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
750 1734
1735
1736#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1737 cedf[i].pending_tasklets.head = NULL;
1738 cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
1739#endif
1740
1741
751 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1742 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
752 return -ENOMEM; 1743 return -ENOMEM;
753#ifdef CONFIG_RELEASE_MASTER 1744#ifdef CONFIG_RELEASE_MASTER
@@ -758,6 +1749,10 @@ static long cedf_activate_plugin(void)
758 /* cycle through cluster and add cpus to them */ 1749 /* cycle through cluster and add cpus to them */
759 for (i = 0; i < num_clusters; i++) { 1750 for (i = 0; i < num_clusters; i++) {
760 1751
1752#ifdef CONFIG_LITMUS_DGL_SUPPORT
1753 raw_spin_lock_init(&cedf[i].dgl_lock);
1754#endif
1755
761 for_each_online_cpu(cpu) { 1756 for_each_online_cpu(cpu) {
762 /* check if the cpu is already in a cluster */ 1757 /* check if the cpu is already in a cluster */
763 for (j = 0; j < num_clusters; j++) 1758 for (j = 0; j < num_clusters; j++)
@@ -788,8 +1783,8 @@ static long cedf_activate_plugin(void)
788 atomic_set(&entry->will_schedule, 0); 1783 atomic_set(&entry->will_schedule, 0);
789 entry->cpu = ccpu; 1784 entry->cpu = ccpu;
790 entry->cluster = &cedf[i]; 1785 entry->cluster = &cedf[i];
791 entry->hn = &(cedf[i].heap_node[cpu_count]); 1786
792 bheap_node_init(&entry->hn, entry); 1787 INIT_BINHEAP_NODE(&entry->hn);
793 1788
794 cpu_count++; 1789 cpu_count++;
795 1790
@@ -806,6 +1801,14 @@ static long cedf_activate_plugin(void)
806 } 1801 }
807 } 1802 }
808 1803
1804#ifdef CONFIG_LITMUS_SOFTIRQD
1805 init_klmirqd();
1806#endif
1807
1808#ifdef CONFIG_LITMUS_NVIDIA
1809 init_nvidia_info();
1810#endif
1811
809 free_cpumask_var(mask); 1812 free_cpumask_var(mask);
810 clusters_allocated = 1; 1813 clusters_allocated = 1;
811 return 0; 1814 return 0;
@@ -824,6 +1827,33 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
824 .task_block = cedf_task_block, 1827 .task_block = cedf_task_block,
825 .admit_task = cedf_admit_task, 1828 .admit_task = cedf_admit_task,
826 .activate_plugin = cedf_activate_plugin, 1829 .activate_plugin = cedf_activate_plugin,
1830 .compare = edf_higher_prio,
1831#ifdef CONFIG_LITMUS_LOCKING
1832 .allocate_lock = cedf_allocate_lock,
1833 .increase_prio = increase_priority_inheritance,
1834 .decrease_prio = decrease_priority_inheritance,
1835 .__increase_prio = __increase_priority_inheritance,
1836 .__decrease_prio = __decrease_priority_inheritance,
1837#endif
1838#ifdef CONFIG_LITMUS_NESTED_LOCKING
1839 .nested_increase_prio = nested_increase_priority_inheritance,
1840 .nested_decrease_prio = nested_decrease_priority_inheritance,
1841 .__compare = __edf_higher_prio,
1842#endif
1843#ifdef CONFIG_LITMUS_DGL_SUPPORT
1844 .get_dgl_spinlock = cedf_get_dgl_spinlock,
1845#endif
1846#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1847 .allocate_aff_obs = cedf_allocate_affinity_observer,
1848#endif
1849#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1850 .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
1851 .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
1852 .run_tasklets = cedf_run_tasklets,
1853#endif
1854#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1855 .map_gpu_to_cpu = cedf_map_gpu_to_cpu,
1856#endif
827}; 1857};
828 1858
829static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1859static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;