aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/sched_cedf.c
diff options
context:
space:
mode:
Diffstat (limited to 'litmus/sched_cedf.c')
-rw-r--r--litmus/sched_cedf.c1102
1 files changed, 1094 insertions, 8 deletions
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 49653f1ea49d..a55fc894340d 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,6 +29,7 @@
29#include <linux/percpu.h> 29#include <linux/percpu.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/uaccess.h>
32 33
33#include <linux/module.h> 34#include <linux/module.h>
34 35
@@ -50,7 +51,23 @@
50 51
51/* to configure the cluster size */ 52/* to configure the cluster size */
52#include <litmus/litmus_proc.h> 53#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h> 54
55#ifdef CONFIG_SCHED_CPU_AFFINITY
56#include <litmus/affinity.h>
57#endif
58
59#ifdef CONFIG_LITMUS_SOFTIRQD
60#include <litmus/litmus_softirq.h>
61#endif
62
63#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
64#include <linux/interrupt.h>
65#include <litmus/trace.h>
66#endif
67
68#ifdef CONFIG_LITMUS_NVIDIA
69#include <litmus/nvidia_info.h>
70#endif
54 71
55/* Reference configuration variable. Determines which cache level is used to 72/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that 73 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
@@ -86,6 +103,15 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
86#define test_will_schedule(cpu) \ 103#define test_will_schedule(cpu) \
87 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 104 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
88 105
106
107#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
108struct tasklet_head
109{
110 struct tasklet_struct *head;
111 struct tasklet_struct **tail;
112};
113#endif
114
89/* 115/*
90 * In C-EDF there is a cedf domain _per_ cluster 116 * In C-EDF there is a cedf domain _per_ cluster
91 * The number of clusters is dynamically determined accordingly to the 117 * The number of clusters is dynamically determined accordingly to the
@@ -102,6 +128,10 @@ typedef struct clusterdomain {
102 struct binheap_handle cpu_heap; 128 struct binheap_handle cpu_heap;
103 /* lock for this cluster */ 129 /* lock for this cluster */
104#define cluster_lock domain.ready_lock 130#define cluster_lock domain.ready_lock
131
132#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
133 struct tasklet_head pending_tasklets;
134#endif
105} cedf_domain_t; 135} cedf_domain_t;
106 136
107/* a cedf_domain per cluster; allocation is done at init/activation time */ 137/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -206,7 +236,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
206} 236}
207 237
208/* unlink - Make sure a task is not linked any longer to an entry 238/* unlink - Make sure a task is not linked any longer to an entry
209 * where it was linked before. Must hold cedf_lock. 239 * where it was linked before. Must hold cluster_lock.
210 */ 240 */
211static noinline void unlink(struct task_struct* t) 241static noinline void unlink(struct task_struct* t)
212{ 242{
@@ -242,7 +272,7 @@ static void preempt(cpu_entry_t *entry)
242} 272}
243 273
244/* requeue - Put an unlinked task into gsn-edf domain. 274/* requeue - Put an unlinked task into gsn-edf domain.
245 * Caller must hold cedf_lock. 275 * Caller must hold cluster_lock.
246 */ 276 */
247static noinline void requeue(struct task_struct* task) 277static noinline void requeue(struct task_struct* task)
248{ 278{
@@ -337,13 +367,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
337 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 367 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
338} 368}
339 369
340/* caller holds cedf_lock */ 370/* caller holds cluster_lock */
341static noinline void job_completion(struct task_struct *t, int forced) 371static noinline void job_completion(struct task_struct *t, int forced)
342{ 372{
343 BUG_ON(!t); 373 BUG_ON(!t);
344 374
345 sched_trace_task_completion(t, forced); 375 sched_trace_task_completion(t, forced);
346 376
377#ifdef CONFIG_LITMUS_NVIDIA
378 atomic_set(&tsk_rt(t)->nv_int_count, 0);
379#endif
380
347 TRACE_TASK(t, "job_completion().\n"); 381 TRACE_TASK(t, "job_completion().\n");
348 382
349 /* set flags */ 383 /* set flags */
@@ -387,6 +421,288 @@ static void cedf_tick(struct task_struct* t)
387 } 421 }
388} 422}
389 423
424
425
426
427
428
429
430
431
432
433
434
435
436#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
437
438
439static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
440{
441 if (!atomic_read(&tasklet->count)) {
442 if(tasklet->owner) {
443 sched_trace_tasklet_begin(tasklet->owner);
444 }
445
446 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
447 {
448 BUG();
449 }
450 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
451 __FUNCTION__,
452 (tasklet->owner) ? tasklet->owner->pid : -1,
453 (tasklet->owner) ? 0 : 1);
454 tasklet->func(tasklet->data);
455 tasklet_unlock(tasklet);
456
457 if(tasklet->owner) {
458 sched_trace_tasklet_end(tasklet->owner, flushed);
459 }
460 }
461 else {
462 BUG();
463 }
464}
465
466
467static void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task)
468{
469 // lazy flushing.
470 // just change ownership to NULL and let an idle processor
471 // take care of it. :P
472
473 struct tasklet_struct* step;
474 unsigned long flags;
475
476 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
477
478 for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
479 if(step->owner == task) {
480 TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
481 step->owner = NULL;
482 }
483 }
484
485 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
486}
487
488
489static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
490{
491 int work_to_do = 1;
492 struct tasklet_struct *tasklet = NULL;
493 unsigned long flags;
494
495 while(work_to_do) {
496
497 TS_NV_SCHED_BOTISR_START;
498
499 // remove tasklet at head of list if it has higher priority.
500 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
501
502 if(cluster->pending_tasklets.head != NULL) {
503 // remove tasklet at head.
504 tasklet = cluster->pending_tasklets.head;
505
506 if(edf_higher_prio(tasklet->owner, sched_task)) {
507
508 if(NULL == tasklet->next) {
509 // tasklet is at the head, list only has one element
510 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
511 cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
512 }
513
514 // remove the tasklet from the queue
515 cluster->pending_tasklets.head = tasklet->next;
516
517 TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
518 }
519 else {
520 TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id());
521 tasklet = NULL;
522 }
523 }
524 else {
525 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
526 }
527
528 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
529
530 TS_NV_SCHED_BOTISR_END;
531
532 if(tasklet) {
533 __do_lit_tasklet(tasklet, 0ul);
534 tasklet = NULL;
535 }
536 else {
537 work_to_do = 0;
538 }
539 }
540}
541
542
543static void run_tasklets(struct task_struct* sched_task)
544{
545 cedf_domain_t* cluster;
546
547 preempt_disable();
548
549 cluster = (is_realtime(sched_task)) ?
550 task_cpu_cluster(sched_task) :
551 remote_cluster(smp_processor_id());
552
553 if(cluster && cluster->pending_tasklets.head != NULL) {
554 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
555 do_lit_tasklets(cluster, sched_task);
556 }
557
558 preempt_enable_no_resched();
559}
560
561
562static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
563{
564 struct tasklet_struct* step;
565
566 tasklet->next = NULL; // make sure there are no old values floating around
567
568 step = cluster->pending_tasklets.head;
569 if(step == NULL) {
570 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
571 // insert at tail.
572 *(cluster->pending_tasklets.tail) = tasklet;
573 cluster->pending_tasklets.tail = &(tasklet->next);
574 }
575 else if((*(cluster->pending_tasklets.tail) != NULL) &&
576 edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
577 // insert at tail.
578 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
579
580 *(cluster->pending_tasklets.tail) = tasklet;
581 cluster->pending_tasklets.tail = &(tasklet->next);
582 }
583 else {
584
585 // insert the tasklet somewhere in the middle.
586
587 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
588
589 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
590 step = step->next;
591 }
592
593 // insert tasklet right before step->next.
594
595 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
596 tasklet->owner->pid,
597 (step->owner) ?
598 step->owner->pid :
599 -1,
600 (step->next) ?
601 ((step->next->owner) ?
602 step->next->owner->pid :
603 -1) :
604 -1);
605
606 tasklet->next = step->next;
607 step->next = tasklet;
608
609 // patch up the head if needed.
610 if(cluster->pending_tasklets.head == step)
611 {
612 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
613 cluster->pending_tasklets.head = tasklet;
614 }
615 }
616}
617
618static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
619{
620 cedf_domain_t *cluster = NULL;
621 cpu_entry_t *targetCPU = NULL;
622 int thisCPU;
623 int runLocal = 0;
624 int runNow = 0;
625 unsigned long flags;
626
627 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
628 {
629 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
630 return 0;
631 }
632
633 cluster = task_cpu_cluster(tasklet->owner);
634
635 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
636
637 thisCPU = smp_processor_id();
638
639#ifdef CONFIG_SCHED_CPU_AFFINITY
640 {
641 cpu_entry_t* affinity = NULL;
642
643 // use this CPU if it is in our cluster and isn't running any RT work.
644 if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
645 affinity = &(__get_cpu_var(cedf_cpu_entries));
646 }
647 else {
648 // this CPU is busy or shouldn't run tasklet in this cluster.
649 // look for available near by CPUs.
650 // NOTE: Affinity towards owner and not this CPU. Is this right?
651 affinity =
652 cedf_get_nearest_available_cpu(cluster,
653 &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
654 }
655
656 targetCPU = affinity;
657 }
658#endif
659
660 if (targetCPU == NULL) {
661 targetCPU = lowest_prio_cpu(cluster);
662 }
663
664 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
665 if (thisCPU == targetCPU->cpu) {
666 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
667 runLocal = 1;
668 runNow = 1;
669 }
670 else {
671 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
672 runLocal = 0;
673 runNow = 1;
674 }
675 }
676 else {
677 runLocal = 0;
678 runNow = 0;
679 }
680
681 if(!runLocal) {
682 // enqueue the tasklet
683 __add_pai_tasklet(tasklet, cluster);
684 }
685
686 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
687
688
689 if (runLocal /*&& runNow */) { // runNow == 1 is implied
690 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
691 __do_lit_tasklet(tasklet, 0ul);
692 }
693 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
694 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
695 preempt(targetCPU); // need to be protected by cluster_lock?
696 }
697 else {
698 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
699 }
700
701 return(1); // success
702}
703
704#endif // PAI
705
390/* Getting schedule() right is a bit tricky. schedule() may not make any 706/* Getting schedule() right is a bit tricky. schedule() may not make any
391 * assumptions on the state of the current task since it may be called for a 707 * assumptions on the state of the current task since it may be called for a
392 * number of reasons. The reasons include a scheduler_tick() determined that it 708 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -512,7 +828,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
512 raw_spin_unlock(&cluster->cluster_lock); 828 raw_spin_unlock(&cluster->cluster_lock);
513 829
514#ifdef WANT_ALL_SCHED_EVENTS 830#ifdef WANT_ALL_SCHED_EVENTS
515 TRACE("cedf_lock released, next=0x%p\n", next); 831 TRACE("cluster_lock released, next=0x%p\n", next);
516 832
517 if (next) 833 if (next)
518 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 834 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -520,7 +836,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
520 TRACE("becomes idle at %llu.\n", litmus_clock()); 836 TRACE("becomes idle at %llu.\n", litmus_clock());
521#endif 837#endif
522 838
523
524 return next; 839 return next;
525} 840}
526 841
@@ -584,7 +899,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
584static void cedf_task_wake_up(struct task_struct *task) 899static void cedf_task_wake_up(struct task_struct *task)
585{ 900{
586 unsigned long flags; 901 unsigned long flags;
587 lt_t now; 902 //lt_t now;
588 cedf_domain_t *cluster; 903 cedf_domain_t *cluster;
589 904
590 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 905 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -592,6 +907,8 @@ static void cedf_task_wake_up(struct task_struct *task)
592 cluster = task_cpu_cluster(task); 907 cluster = task_cpu_cluster(task);
593 908
594 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 909 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
910
911#if 0 // sproadic task model
595 /* We need to take suspensions because of semaphores into 912 /* We need to take suspensions because of semaphores into
596 * account! If a job resumes after being suspended due to acquiring 913 * account! If a job resumes after being suspended due to acquiring
597 * a semaphore, it should never be treated as a new job release. 914 * a semaphore, it should never be treated as a new job release.
@@ -613,7 +930,13 @@ static void cedf_task_wake_up(struct task_struct *task)
613 } 930 }
614 } 931 }
615 } 932 }
616 cedf_job_arrival(task); 933#endif
934
935 set_rt_flags(task, RT_F_RUNNING); // periodic model
936
937 if(tsk_rt(task)->linked_on == NO_CPU)
938 cedf_job_arrival(task);
939
617 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 940 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
618} 941}
619 942
@@ -640,6 +963,10 @@ static void cedf_task_exit(struct task_struct * t)
640 unsigned long flags; 963 unsigned long flags;
641 cedf_domain_t *cluster = task_cpu_cluster(t); 964 cedf_domain_t *cluster = task_cpu_cluster(t);
642 965
966#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
967 flush_tasklets(cluster, t);
968#endif
969
643 /* unlink if necessary */ 970 /* unlink if necessary */
644 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 971 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
645 unlink(t); 972 unlink(t);
@@ -660,6 +987,711 @@ static long cedf_admit_task(struct task_struct* tsk)
660 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 987 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
661} 988}
662 989
990
991
992#ifdef CONFIG_LITMUS_LOCKING
993
994#include <litmus/fdso.h>
995
996
997static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
998{
999 int linked_on;
1000 int check_preempt = 0;
1001
1002 cedf_domain_t* cluster = task_cpu_cluster(t);
1003
1004 if(prio_inh != NULL)
1005 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
1006 else
1007 TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
1008
1009 sched_trace_eff_prio_change(t, prio_inh);
1010
1011 tsk_rt(t)->inh_task = prio_inh;
1012
1013 linked_on = tsk_rt(t)->linked_on;
1014
1015 /* If it is scheduled, then we need to reorder the CPU heap. */
1016 if (linked_on != NO_CPU) {
1017 TRACE_TASK(t, "%s: linked on %d\n",
1018 __FUNCTION__, linked_on);
1019 /* Holder is scheduled; need to re-order CPUs.
1020 * We can't use heap_decrease() here since
1021 * the cpu_heap is ordered in reverse direction, so
1022 * it is actually an increase. */
1023 bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
1024 per_cpu(cedf_cpu_entries, linked_on).hn);
1025 bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
1026 per_cpu(cedf_cpu_entries, linked_on).hn);
1027 } else {
1028 /* holder may be queued: first stop queue changes */
1029 raw_spin_lock(&cluster->domain.release_lock);
1030 if (is_queued(t)) {
1031 TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
1032
1033 /* We need to update the position of holder in some
1034 * heap. Note that this could be a release heap if we
1035 * budget enforcement is used and this job overran. */
1036 check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
1037
1038 } else {
1039 /* Nothing to do: if it is not queued and not linked
1040 * then it is either sleeping or currently being moved
1041 * by other code (e.g., a timer interrupt handler) that
1042 * will use the correct priority when enqueuing the
1043 * task. */
1044 TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
1045 }
1046 raw_spin_unlock(&cluster->domain.release_lock);
1047
1048 /* If holder was enqueued in a release heap, then the following
1049 * preemption check is pointless, but we can't easily detect
1050 * that case. If you want to fix this, then consider that
1051 * simply adding a state flag requires O(n) time to update when
1052 * releasing n tasks, which conflicts with the goal to have
1053 * O(log n) merges. */
1054 if (check_preempt) {
1055 /* heap_decrease() hit the top level of the heap: make
1056 * sure preemption checks get the right task, not the
1057 * potentially stale cache. */
1058 bheap_uncache_min(edf_ready_order, &cluster->domain.ready_queue);
1059 check_for_preemptions(cluster);
1060 }
1061 }
1062}
1063
1064/* called with IRQs off */
1065static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1066{
1067 cedf_domain_t* cluster = task_cpu_cluster(t);
1068
1069 raw_spin_lock(&cluster->cluster_lock);
1070
1071 __set_priority_inheritance(t, prio_inh);
1072
1073#ifdef CONFIG_LITMUS_SOFTIRQD
1074 if(tsk_rt(t)->cur_klitirqd != NULL)
1075 {
1076 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
1077 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1078
1079 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1080 }
1081#endif
1082
1083 raw_spin_unlock(&cluster->cluster_lock);
1084}
1085
1086
1087/* called with IRQs off */
1088static void __clear_priority_inheritance(struct task_struct* t)
1089{
1090 TRACE_TASK(t, "priority restored\n");
1091
1092 if(tsk_rt(t)->scheduled_on != NO_CPU)
1093 {
1094 sched_trace_eff_prio_change(t, NULL);
1095
1096 tsk_rt(t)->inh_task = NULL;
1097
1098 /* Check if rescheduling is necessary. We can't use heap_decrease()
1099 * since the priority was effectively lowered. */
1100 unlink(t);
1101 cedf_job_arrival(t);
1102 }
1103 else
1104 {
1105 __set_priority_inheritance(t, NULL);
1106 }
1107
1108#ifdef CONFIG_LITMUS_SOFTIRQD
1109 if(tsk_rt(t)->cur_klitirqd != NULL)
1110 {
1111 TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
1112 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1113
1114 if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
1115 {
1116 sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
1117
1118 tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
1119
1120 /* Check if rescheduling is necessary. We can't use heap_decrease()
1121 * since the priority was effectively lowered. */
1122 unlink(tsk_rt(t)->cur_klitirqd);
1123 cedf_job_arrival(tsk_rt(t)->cur_klitirqd);
1124 }
1125 else
1126 {
1127 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
1128 }
1129 }
1130#endif
1131}
1132
1133/* called with IRQs off */
1134static void clear_priority_inheritance(struct task_struct* t)
1135{
1136 cedf_domain_t* cluster = task_cpu_cluster(t);
1137
1138 raw_spin_lock(&cluster->cluster_lock);
1139 __clear_priority_inheritance(t);
1140 raw_spin_unlock(&cluster->cluster_lock);
1141}
1142
1143
1144
1145#ifdef CONFIG_LITMUS_SOFTIRQD
1146/* called with IRQs off */
1147static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1148 struct task_struct* old_owner,
1149 struct task_struct* new_owner)
1150{
1151 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1152
1153 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1154
1155 raw_spin_lock(&cluster->cluster_lock);
1156
1157 if(old_owner != new_owner)
1158 {
1159 if(old_owner)
1160 {
1161 // unreachable?
1162 tsk_rt(old_owner)->cur_klitirqd = NULL;
1163 }
1164
1165 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
1166 new_owner->comm, new_owner->pid);
1167
1168 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
1169 }
1170
1171 __set_priority_inheritance(klitirqd,
1172 (tsk_rt(new_owner)->inh_task == NULL) ?
1173 new_owner :
1174 tsk_rt(new_owner)->inh_task);
1175
1176 raw_spin_unlock(&cluster->cluster_lock);
1177}
1178
1179/* called with IRQs off */
1180static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1181 struct task_struct* old_owner)
1182{
1183 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1184
1185 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1186
1187 raw_spin_lock(&cluster->cluster_lock);
1188
1189 TRACE_TASK(klitirqd, "priority restored\n");
1190
1191 if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
1192 {
1193 tsk_rt(klitirqd)->inh_task = NULL;
1194
1195 /* Check if rescheduling is necessary. We can't use heap_decrease()
1196 * since the priority was effectively lowered. */
1197 unlink(klitirqd);
1198 cedf_job_arrival(klitirqd);
1199 }
1200 else
1201 {
1202 __set_priority_inheritance(klitirqd, NULL);
1203 }
1204
1205 tsk_rt(old_owner)->cur_klitirqd = NULL;
1206
1207 raw_spin_unlock(&cluster->cluster_lock);
1208}
1209#endif // CONFIG_LITMUS_SOFTIRQD
1210
1211
1212/* ******************** KFMLP support ********************** */
1213
1214/* struct for semaphore with priority inheritance */
1215struct kfmlp_queue
1216{
1217 wait_queue_head_t wait;
1218 struct task_struct* owner;
1219 struct task_struct* hp_waiter;
1220 int count; /* number of waiters + holder */
1221};
1222
1223struct kfmlp_semaphore
1224{
1225 struct litmus_lock litmus_lock;
1226
1227 spinlock_t lock;
1228
1229 int num_resources; /* aka k */
1230 struct kfmlp_queue *queues; /* array */
1231 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
1232};
1233
1234static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
1235{
1236 return container_of(lock, struct kfmlp_semaphore, litmus_lock);
1237}
1238
1239static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
1240 struct kfmlp_queue* queue)
1241{
1242 return (queue - &sem->queues[0]);
1243}
1244
1245static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
1246 struct task_struct* holder)
1247{
1248 int i;
1249 for(i = 0; i < sem->num_resources; ++i)
1250 if(sem->queues[i].owner == holder)
1251 return(&sem->queues[i]);
1252 return(NULL);
1253}
1254
1255/* caller is responsible for locking */
1256static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
1257 struct task_struct *skip)
1258{
1259 struct list_head *pos;
1260 struct task_struct *queued, *found = NULL;
1261
1262 list_for_each(pos, &kqueue->wait.task_list) {
1263 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
1264 task_list)->private;
1265
1266 /* Compare task prios, find high prio task. */
1267 if (queued != skip && edf_higher_prio(queued, found))
1268 found = queued;
1269 }
1270 return found;
1271}
1272
1273static inline struct kfmlp_queue* kfmlp_find_shortest(
1274 struct kfmlp_semaphore* sem,
1275 struct kfmlp_queue* search_start)
1276{
1277 // we start our search at search_start instead of at the beginning of the
1278 // queue list to load-balance across all resources.
1279 struct kfmlp_queue* step = search_start;
1280 struct kfmlp_queue* shortest = sem->shortest_queue;
1281
1282 do
1283 {
1284 step = (step+1 != &sem->queues[sem->num_resources]) ?
1285 step+1 : &sem->queues[0];
1286 if(step->count < shortest->count)
1287 {
1288 shortest = step;
1289 if(step->count == 0)
1290 break; /* can't get any shorter */
1291 }
1292 }while(step != search_start);
1293
1294 return(shortest);
1295}
1296
1297static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
1298{
1299 /* must hold sem->lock */
1300
1301 struct kfmlp_queue *my_queue = NULL;
1302 struct task_struct *max_hp = NULL;
1303
1304
1305 struct list_head *pos;
1306 struct task_struct *queued;
1307 int i;
1308
1309 for(i = 0; i < sem->num_resources; ++i)
1310 {
1311 if( (sem->queues[i].count > 1) &&
1312 ((my_queue == NULL) ||
1313 (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
1314 {
1315 my_queue = &sem->queues[i];
1316 }
1317 }
1318
1319 if(my_queue)
1320 {
1321 cedf_domain_t* cluster;
1322
1323 max_hp = my_queue->hp_waiter;
1324 BUG_ON(!max_hp);
1325
1326 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
1327 kfmlp_get_idx(sem, my_queue),
1328 max_hp->comm, max_hp->pid,
1329 kfmlp_get_idx(sem, my_queue));
1330
1331 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
1332
1333 /*
1334 if(my_queue->hp_waiter)
1335 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
1336 kfmlp_get_idx(sem, my_queue),
1337 my_queue->hp_waiter->comm,
1338 my_queue->hp_waiter->pid);
1339 else
1340 TRACE_CUR("queue %d: new hp_waiter is %p\n",
1341 kfmlp_get_idx(sem, my_queue), NULL);
1342 */
1343
1344 cluster = task_cpu_cluster(max_hp);
1345
1346 raw_spin_lock(&cluster->cluster_lock);
1347
1348 /*
1349 if(my_queue->owner)
1350 TRACE_CUR("queue %d: owner is %s/%d\n",
1351 kfmlp_get_idx(sem, my_queue),
1352 my_queue->owner->comm,
1353 my_queue->owner->pid);
1354 else
1355 TRACE_CUR("queue %d: owner is %p\n",
1356 kfmlp_get_idx(sem, my_queue),
1357 NULL);
1358 */
1359
1360 if(tsk_rt(my_queue->owner)->inh_task == max_hp)
1361 {
1362 __clear_priority_inheritance(my_queue->owner);
1363 if(my_queue->hp_waiter != NULL)
1364 {
1365 __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1366 }
1367 }
1368 raw_spin_unlock(&cluster->cluster_lock);
1369
1370 list_for_each(pos, &my_queue->wait.task_list)
1371 {
1372 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
1373 task_list)->private;
1374 /* Compare task prios, find high prio task. */
1375 if (queued == max_hp)
1376 {
1377 /*
1378 TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
1379 kfmlp_get_idx(sem, my_queue));
1380 */
1381 __remove_wait_queue(&my_queue->wait,
1382 list_entry(pos, wait_queue_t, task_list));
1383 break;
1384 }
1385 }
1386 --(my_queue->count);
1387 }
1388
1389 return(max_hp);
1390}
1391
1392int cedf_kfmlp_lock(struct litmus_lock* l)
1393{
1394 struct task_struct* t = current;
1395 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1396 struct kfmlp_queue* my_queue;
1397 wait_queue_t wait;
1398 unsigned long flags;
1399
1400 if (!is_realtime(t))
1401 return -EPERM;
1402
1403 spin_lock_irqsave(&sem->lock, flags);
1404
1405 my_queue = sem->shortest_queue;
1406
1407 if (my_queue->owner) {
1408 /* resource is not free => must suspend and wait */
1409 TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
1410 kfmlp_get_idx(sem, my_queue));
1411
1412 init_waitqueue_entry(&wait, t);
1413
1414 /* FIXME: interruptible would be nice some day */
1415 set_task_state(t, TASK_UNINTERRUPTIBLE);
1416
1417 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
1418
1419 /* check if we need to activate priority inheritance */
1420 if (edf_higher_prio(t, my_queue->hp_waiter))
1421 {
1422 my_queue->hp_waiter = t;
1423 if (edf_higher_prio(t, my_queue->owner))
1424 {
1425 set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1426 }
1427 }
1428
1429 ++(my_queue->count);
1430 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1431
1432 /* release lock before sleeping */
1433 spin_unlock_irqrestore(&sem->lock, flags);
1434
1435 /* We depend on the FIFO order. Thus, we don't need to recheck
1436 * when we wake up; we are guaranteed to have the lock since
1437 * there is only one wake up per release (or steal).
1438 */
1439 schedule();
1440
1441
1442 if(my_queue->owner == t)
1443 {
1444 TRACE_CUR("queue %d: acquired through waiting\n",
1445 kfmlp_get_idx(sem, my_queue));
1446 }
1447 else
1448 {
1449 /* this case may happen if our wait entry was stolen
1450 between queues. record where we went.*/
1451 my_queue = kfmlp_get_queue(sem, t);
1452 BUG_ON(!my_queue);
1453 TRACE_CUR("queue %d: acquired through stealing\n",
1454 kfmlp_get_idx(sem, my_queue));
1455 }
1456 }
1457 else
1458 {
1459 TRACE_CUR("queue %d: acquired immediately\n",
1460 kfmlp_get_idx(sem, my_queue));
1461
1462 my_queue->owner = t;
1463
1464 ++(my_queue->count);
1465 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1466
1467 spin_unlock_irqrestore(&sem->lock, flags);
1468 }
1469
1470 return kfmlp_get_idx(sem, my_queue);
1471}
1472
1473int cedf_kfmlp_unlock(struct litmus_lock* l)
1474{
1475 struct task_struct *t = current, *next;
1476 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1477 struct kfmlp_queue *my_queue;
1478 unsigned long flags;
1479 int err = 0;
1480
1481 spin_lock_irqsave(&sem->lock, flags);
1482
1483 my_queue = kfmlp_get_queue(sem, t);
1484
1485 if (!my_queue) {
1486 err = -EINVAL;
1487 goto out;
1488 }
1489
1490 /* check if there are jobs waiting for this resource */
1491 next = __waitqueue_remove_first(&my_queue->wait);
1492 if (next) {
1493 /*
1494 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1495 kfmlp_get_idx(sem, my_queue),
1496 next->comm, next->pid);
1497 */
1498 /* next becomes the resouce holder */
1499 my_queue->owner = next;
1500
1501 --(my_queue->count);
1502 if(my_queue->count < sem->shortest_queue->count)
1503 {
1504 sem->shortest_queue = my_queue;
1505 }
1506
1507 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
1508 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
1509
1510 /* determine new hp_waiter if necessary */
1511 if (next == my_queue->hp_waiter) {
1512 TRACE_TASK(next, "was highest-prio waiter\n");
1513 /* next has the highest priority --- it doesn't need to
1514 * inherit. However, we need to make sure that the
1515 * next-highest priority in the queue is reflected in
1516 * hp_waiter. */
1517 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
1518 if (my_queue->hp_waiter)
1519 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
1520 else
1521 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
1522 } else {
1523 /* Well, if next is not the highest-priority waiter,
1524 * then it ought to inherit the highest-priority
1525 * waiter's priority. */
1526 set_priority_inheritance(next, my_queue->hp_waiter);
1527 }
1528
1529 /* wake up next */
1530 wake_up_process(next);
1531 }
1532 else
1533 {
1534 TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
1535
1536 next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
1537
1538 /*
1539 if(next)
1540 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
1541 kfmlp_get_idx(sem, my_queue),
1542 next->comm, next->pid);
1543 */
1544
1545 my_queue->owner = next;
1546
1547 if(next)
1548 {
1549 TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
1550 kfmlp_get_idx(sem, my_queue),
1551 next->comm, next->pid);
1552
1553 /* wake up next */
1554 wake_up_process(next);
1555 }
1556 else
1557 {
1558 TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
1559
1560 --(my_queue->count);
1561 if(my_queue->count < sem->shortest_queue->count)
1562 {
1563 sem->shortest_queue = my_queue;
1564 }
1565 }
1566 }
1567
1568 /* we lose the benefit of priority inheritance (if any) */
1569 if (tsk_rt(t)->inh_task)
1570 clear_priority_inheritance(t);
1571
1572out:
1573 spin_unlock_irqrestore(&sem->lock, flags);
1574
1575 return err;
1576}
1577
1578int cedf_kfmlp_close(struct litmus_lock* l)
1579{
1580 struct task_struct *t = current;
1581 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1582 struct kfmlp_queue *my_queue;
1583 unsigned long flags;
1584
1585 int owner;
1586
1587 spin_lock_irqsave(&sem->lock, flags);
1588
1589 my_queue = kfmlp_get_queue(sem, t);
1590 owner = (my_queue) ? (my_queue->owner == t) : 0;
1591
1592 spin_unlock_irqrestore(&sem->lock, flags);
1593
1594 if (owner)
1595 cedf_kfmlp_unlock(l);
1596
1597 return 0;
1598}
1599
1600void cedf_kfmlp_free(struct litmus_lock* l)
1601{
1602 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1603 kfree(sem->queues);
1604 kfree(sem);
1605}
1606
1607static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
1608 .close = cedf_kfmlp_close,
1609 .lock = cedf_kfmlp_lock,
1610 .unlock = cedf_kfmlp_unlock,
1611 .deallocate = cedf_kfmlp_free,
1612};
1613
1614static struct litmus_lock* cedf_new_kfmlp(void* __user arg, int* ret_code)
1615{
1616 struct kfmlp_semaphore* sem;
1617 int num_resources = 0;
1618 int i;
1619
1620 if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
1621 {
1622 *ret_code = -EINVAL;
1623 return(NULL);
1624 }
1625 if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
1626 {
1627 *ret_code = -EINVAL;
1628 return(NULL);
1629 }
1630 if(num_resources < 1)
1631 {
1632 *ret_code = -EINVAL;
1633 return(NULL);
1634 }
1635
1636 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1637 if(!sem)
1638 {
1639 *ret_code = -ENOMEM;
1640 return NULL;
1641 }
1642
1643 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
1644 if(!sem->queues)
1645 {
1646 kfree(sem);
1647 *ret_code = -ENOMEM;
1648 return NULL;
1649 }
1650
1651 sem->litmus_lock.ops = &cedf_kfmlp_lock_ops;
1652 spin_lock_init(&sem->lock);
1653 sem->num_resources = num_resources;
1654
1655 for(i = 0; i < num_resources; ++i)
1656 {
1657 sem->queues[i].owner = NULL;
1658 sem->queues[i].hp_waiter = NULL;
1659 init_waitqueue_head(&sem->queues[i].wait);
1660 sem->queues[i].count = 0;
1661 }
1662
1663 sem->shortest_queue = &sem->queues[0];
1664
1665 *ret_code = 0;
1666 return &sem->litmus_lock;
1667}
1668
1669
1670/* **** lock constructor **** */
1671
1672static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1673 void* __user arg)
1674{
1675 int err = -ENXIO;
1676
1677 /* C-EDF currently only supports the FMLP for global resources
1678 WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
1679 switch (type) {
1680 case KFMLP_SEM:
1681 *lock = cedf_new_kfmlp(arg, &err);
1682 break;
1683 };
1684
1685 return err;
1686}
1687
1688#endif // CONFIG_LITMUS_LOCKING
1689
1690
1691
1692
1693
1694
663/* total number of cluster */ 1695/* total number of cluster */
664static int num_clusters; 1696static int num_clusters;
665/* we do not support cluster of different sizes */ 1697/* we do not support cluster of different sizes */
@@ -749,6 +1781,13 @@ static long cedf_activate_plugin(void)
749 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio); 1781 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
750 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1782 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
751 1783
1784
1785#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1786 cedf[i].pending_tasklets.head = NULL;
1787 cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
1788#endif
1789
1790
752 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1791 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
753 return -ENOMEM; 1792 return -ENOMEM;
754#ifdef CONFIG_RELEASE_MASTER 1793#ifdef CONFIG_RELEASE_MASTER
@@ -807,6 +1846,40 @@ static long cedf_activate_plugin(void)
807 break; 1846 break;
808 } 1847 }
809 } 1848 }
1849
1850#ifdef CONFIG_LITMUS_SOFTIRQD
1851 {
1852 /* distribute the daemons evenly across the clusters. */
1853 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1854 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1855 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1856
1857 int daemon = 0;
1858 for(i = 0; i < num_clusters; ++i)
1859 {
1860 int num_on_this_cluster = num_daemons_per_cluster;
1861 if(left_over)
1862 {
1863 ++num_on_this_cluster;
1864 --left_over;
1865 }
1866
1867 for(j = 0; j < num_on_this_cluster; ++j)
1868 {
1869 // first CPU of this cluster
1870 affinity[daemon++] = i*cluster_size;
1871 }
1872 }
1873
1874 spawn_klitirqd(affinity);
1875
1876 kfree(affinity);
1877 }
1878#endif
1879
1880#ifdef CONFIG_LITMUS_NVIDIA
1881 init_nvidia_info();
1882#endif
810 1883
811 free_cpumask_var(mask); 1884 free_cpumask_var(mask);
812 clusters_allocated = 1; 1885 clusters_allocated = 1;
@@ -826,6 +1899,19 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
826 .task_block = cedf_task_block, 1899 .task_block = cedf_task_block,
827 .admit_task = cedf_admit_task, 1900 .admit_task = cedf_admit_task,
828 .activate_plugin = cedf_activate_plugin, 1901 .activate_plugin = cedf_activate_plugin,
1902#ifdef CONFIG_LITMUS_LOCKING
1903 .allocate_lock = cedf_allocate_lock,
1904 .set_prio_inh = set_priority_inheritance,
1905 .clear_prio_inh = clear_priority_inheritance,
1906#endif
1907#ifdef CONFIG_LITMUS_SOFTIRQD
1908 .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
1909 .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
1910#endif
1911#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1912 .enqueue_pai_tasklet = enqueue_pai_tasklet,
1913 .run_tasklets = run_tasklets,
1914#endif
829}; 1915};
830 1916
831static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1917static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;