aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/sched_pfair.c
diff options
context:
space:
mode:
Diffstat (limited to 'litmus/sched_pfair.c')
-rw-r--r--litmus/sched_pfair.c225
1 files changed, 127 insertions, 98 deletions
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
index 0a64273daa47..16f1065bbdca 100644
--- a/litmus/sched_pfair.c
+++ b/litmus/sched_pfair.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * kernel/sched_pfair.c 2 * kernel/sched_pfair.c
3 * 3 *
4 * Implementation of the (global) Pfair scheduling algorithm. 4 * Implementation of the PD^2 pfair scheduling algorithm. This
5 * implementation realizes "early releasing," i.e., it is work-conserving.
5 * 6 *
6 */ 7 */
7 8
@@ -76,36 +77,29 @@ struct pfair_state {
76 struct task_struct* local; /* the local copy of linked */ 77 struct task_struct* local; /* the local copy of linked */
77 struct task_struct* scheduled; /* what is actually scheduled */ 78 struct task_struct* scheduled; /* what is actually scheduled */
78 79
79 unsigned long missed_quanta;
80 lt_t offset; /* stagger offset */ 80 lt_t offset; /* stagger offset */
81 unsigned int missed_updates;
82 unsigned int missed_quanta;
81}; 83};
82 84
83/* Currently, we limit the maximum period of any task to 2000 quanta.
84 * The reason is that it makes the implementation easier since we do not
85 * need to reallocate the release wheel on task arrivals.
86 * In the future
87 */
88#define PFAIR_MAX_PERIOD 2000
89
90struct pfair_cluster { 85struct pfair_cluster {
91 struct scheduling_cluster topology; 86 struct scheduling_cluster topology;
92 87
93 /* The "global" time in this cluster. */ 88 /* The "global" time in this cluster. */
94 quanta_t pfair_time; /* the "official" PFAIR clock */ 89 quanta_t pfair_time; /* the "official" PFAIR clock */
95 quanta_t merge_time; /* Updated after the release queue has been
96 * merged. Used by drop_all_references().
97 */
98 90
99 /* The ready queue for this cluster. */ 91 /* The ready queue for this cluster. */
100 rt_domain_t pfair; 92 rt_domain_t pfair;
101 93
102 /* This is the release queue wheel for this cluster. It is indexed by 94 /* The set of jobs that should have their release enacted at the next
103 * pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR 95 * quantum boundary.
104 * priority, so that it can be merged with the ready queue.
105 */ 96 */
106 struct bheap release_queue[PFAIR_MAX_PERIOD]; 97 struct bheap release_queue;
98 raw_spinlock_t release_lock;
107}; 99};
108 100
101#define RT_F_REQUEUE 0x2
102
109static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) 103static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
110{ 104{
111 return container_of(state->topology.cluster, struct pfair_cluster, topology); 105 return container_of(state->topology.cluster, struct pfair_cluster, topology);
@@ -121,6 +115,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos)
121 return list_entry(pos, struct pfair_state, topology.cluster_list); 115 return list_entry(pos, struct pfair_state, topology.cluster_list);
122} 116}
123 117
118static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
119{
120 return container_of(rt, struct pfair_cluster, pfair);
121}
122
124static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) 123static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
125{ 124{
126 /* The ready_lock is used to serialize all scheduling events. */ 125 /* The ready_lock is used to serialize all scheduling events. */
@@ -161,21 +160,11 @@ static quanta_t cur_deadline(struct task_struct* t)
161 return cur_subtask(t)->deadline + tsk_pfair(t)->release; 160 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
162} 161}
163 162
164
165static quanta_t cur_sub_release(struct task_struct* t)
166{
167 return cur_subtask(t)->release + tsk_pfair(t)->release;
168}
169
170static quanta_t cur_release(struct task_struct* t) 163static quanta_t cur_release(struct task_struct* t)
171{ 164{
172#ifdef EARLY_RELEASE 165 /* This is early releasing: only the release of the first subtask
173 /* only the release of the first subtask counts when we early 166 * counts. */
174 * release */
175 return tsk_pfair(t)->release; 167 return tsk_pfair(t)->release;
176#else
177 return cur_sub_release(t);
178#endif
179} 168}
180 169
181static quanta_t cur_overlap(struct task_struct* t) 170static quanta_t cur_overlap(struct task_struct* t)
@@ -235,11 +224,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
235 return pfair_higher_prio(bheap2task(a), bheap2task(b)); 224 return pfair_higher_prio(bheap2task(a), bheap2task(b));
236} 225}
237 226
238/* return the proper release queue for time t */ 227static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
239static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t)
240{ 228{
241 struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD); 229 struct pfair_cluster* cluster = from_domain(rt);
242 return rq; 230 unsigned long flags;
231
232 raw_spin_lock_irqsave(&cluster->release_lock, flags);
233
234 bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
235
236 raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
243} 237}
244 238
245static void prepare_release(struct task_struct* t, quanta_t at) 239static void prepare_release(struct task_struct* t, quanta_t at)
@@ -248,25 +242,12 @@ static void prepare_release(struct task_struct* t, quanta_t at)
248 tsk_pfair(t)->cur = 0; 242 tsk_pfair(t)->cur = 0;
249} 243}
250 244
251static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
252{
253 bheap_insert(pfair_ready_order, queue,
254 tsk_rt(t)->heap_node);
255}
256
257static void pfair_add_release(struct pfair_cluster* cluster,
258 struct task_struct* t)
259{
260 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
261 __pfair_add_release(t, relq(cluster, cur_release(t)));
262}
263
264/* pull released tasks from the release queue */ 245/* pull released tasks from the release queue */
265static void poll_releases(struct pfair_cluster* cluster, 246static void poll_releases(struct pfair_cluster* cluster)
266 quanta_t time)
267{ 247{
268 __merge_ready(&cluster->pfair, relq(cluster, time)); 248 raw_spin_lock(&cluster->release_lock);
269 cluster->merge_time = time; 249 __merge_ready(&cluster->pfair, &cluster->release_queue);
250 raw_spin_unlock(&cluster->release_lock);
270} 251}
271 252
272static void check_preempt(struct task_struct* t) 253static void check_preempt(struct task_struct* t)
@@ -292,16 +273,12 @@ static void drop_all_references(struct task_struct *t)
292{ 273{
293 int cpu; 274 int cpu;
294 struct pfair_state* s; 275 struct pfair_state* s;
295 struct bheap* q;
296 struct pfair_cluster* cluster; 276 struct pfair_cluster* cluster;
297 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { 277 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
298 /* figure out what queue the node is in */ 278 /* It must be in the ready queue; drop references isn't called
279 * when the job is in a release queue. */
299 cluster = tsk_pfair(t)->cluster; 280 cluster = tsk_pfair(t)->cluster;
300 if (time_before_eq(cur_release(t), cluster->merge_time)) 281 bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
301 q = &cluster->pfair.ready_queue;
302 else
303 q = relq(cluster, cur_release(t));
304 bheap_delete(pfair_ready_order, q,
305 tsk_rt(t)->heap_node); 282 tsk_rt(t)->heap_node);
306 } 283 }
307 for (cpu = 0; cpu < num_online_cpus(); cpu++) { 284 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
@@ -313,6 +290,17 @@ static void drop_all_references(struct task_struct *t)
313 if (s->scheduled == t) 290 if (s->scheduled == t)
314 s->scheduled = NULL; 291 s->scheduled = NULL;
315 } 292 }
293 /* make sure we don't have a stale linked_on field */
294 tsk_rt(t)->linked_on = NO_CPU;
295}
296
297static void pfair_prepare_next_period(struct task_struct* t)
298{
299 struct pfair_param* p = tsk_pfair(t);
300
301 prepare_for_next_period(t);
302 get_rt_flags(t) = RT_F_RUNNING;
303 p->release += p->period;
316} 304}
317 305
318/* returns 1 if the task needs to go the release queue */ 306/* returns 1 if the task needs to go the release queue */
@@ -322,30 +310,26 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
322 int to_relq; 310 int to_relq;
323 p->cur = (p->cur + 1) % p->quanta; 311 p->cur = (p->cur + 1) % p->quanta;
324 if (!p->cur) { 312 if (!p->cur) {
325 sched_trace_task_completion(t, 1);
326 if (tsk_rt(t)->present) { 313 if (tsk_rt(t)->present) {
327 /* we start a new job */ 314 /* The job overran; we start a new budget allocation. */
328 prepare_for_next_period(t); 315 pfair_prepare_next_period(t);
329 sched_trace_task_release(t);
330 get_rt_flags(t) = RT_F_RUNNING;
331 p->release += p->period;
332 } else { 316 } else {
333 /* remove task from system until it wakes */ 317 /* remove task from system until it wakes */
334 drop_all_references(t); 318 drop_all_references(t);
319 tsk_rt(t)->flags = RT_F_REQUEUE;
335 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", 320 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
336 cpu, p->cur); 321 cpu, p->cur);
337 return 0; 322 return 0;
338 } 323 }
339 } 324 }
340 to_relq = time_after(cur_release(t), time); 325 to_relq = time_after(cur_release(t), time);
341 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n", 326 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
342 cpu, p->cur, to_relq); 327 cpu, p->cur, to_relq, cur_release(t), time);
343 return to_relq; 328 return to_relq;
344} 329}
345 330
346static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) 331static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
347{ 332{
348 int missed;
349 struct task_struct* l; 333 struct task_struct* l;
350 struct pfair_param* p; 334 struct pfair_param* p;
351 struct list_head* pos; 335 struct list_head* pos;
@@ -354,14 +338,17 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
354 list_for_each(pos, &cluster->topology.cpus) { 338 list_for_each(pos, &cluster->topology.cpus) {
355 cpu = from_cluster_list(pos); 339 cpu = from_cluster_list(pos);
356 l = cpu->linked; 340 l = cpu->linked;
357 missed = cpu->linked != cpu->local; 341 cpu->missed_updates += cpu->linked != cpu->local;
358 if (l) { 342 if (l) {
359 p = tsk_pfair(l); 343 p = tsk_pfair(l);
360 p->last_quantum = time; 344 p->last_quantum = time;
361 p->last_cpu = cpu_id(cpu); 345 p->last_cpu = cpu_id(cpu);
362 if (advance_subtask(time, l, cpu_id(cpu))) { 346 if (advance_subtask(time, l, cpu_id(cpu))) {
363 cpu->linked = NULL; 347 //cpu->linked = NULL;
364 pfair_add_release(cluster, l); 348 PTRACE_TASK(l, "should go to release queue. "
349 "scheduled_on=%d present=%d\n",
350 tsk_rt(l)->scheduled_on,
351 tsk_rt(l)->present);
365 } 352 }
366 } 353 }
367 } 354 }
@@ -445,6 +432,11 @@ static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
445 list_for_each(pos, &cluster->topology.cpus) { 432 list_for_each(pos, &cluster->topology.cpus) {
446 cpu_state = from_cluster_list(pos); 433 cpu_state = from_cluster_list(pos);
447 retry = 1; 434 retry = 1;
435#ifdef CONFIG_RELEASE_MASTER
436 /* skip release master */
437 if (cluster->pfair.release_master == cpu_id(cpu_state))
438 continue;
439#endif
448 while (retry) { 440 while (retry) {
449 if (pfair_higher_prio(__peek_ready(&cluster->pfair), 441 if (pfair_higher_prio(__peek_ready(&cluster->pfair),
450 cpu_state->linked)) 442 cpu_state->linked))
@@ -471,13 +463,13 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
471 sched_trace_quantum_boundary(); 463 sched_trace_quantum_boundary();
472 464
473 advance_subtasks(cluster, time); 465 advance_subtasks(cluster, time);
474 poll_releases(cluster, time); 466 poll_releases(cluster);
475 schedule_subtasks(cluster, time); 467 schedule_subtasks(cluster, time);
476 468
477 list_for_each(pos, &cluster->topology.cpus) { 469 list_for_each(pos, &cluster->topology.cpus) {
478 cpu = from_cluster_list(pos); 470 cpu = from_cluster_list(pos);
479 if (cpu->linked) 471 if (cpu->linked)
480 PTRACE_TASK(pstate[cpu]->linked, 472 PTRACE_TASK(cpu->linked,
481 " linked on %d.\n", cpu_id(cpu)); 473 " linked on %d.\n", cpu_id(cpu));
482 else 474 else
483 PTRACE("(null) linked on %d.\n", cpu_id(cpu)); 475 PTRACE("(null) linked on %d.\n", cpu_id(cpu));
@@ -612,12 +604,42 @@ static int safe_to_schedule(struct task_struct* t, int cpu)
612static struct task_struct* pfair_schedule(struct task_struct * prev) 604static struct task_struct* pfair_schedule(struct task_struct * prev)
613{ 605{
614 struct pfair_state* state = &__get_cpu_var(pfair_state); 606 struct pfair_state* state = &__get_cpu_var(pfair_state);
615 int blocks; 607 struct pfair_cluster* cluster = cpu_cluster(state);
608 int blocks, completion, out_of_time;
616 struct task_struct* next = NULL; 609 struct task_struct* next = NULL;
617 610
611#ifdef CONFIG_RELEASE_MASTER
612 /* Bail out early if we are the release master.
613 * The release master never schedules any real-time tasks.
614 */
615 if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
616 sched_state_task_picked();
617 return NULL;
618 }
619#endif
620
618 raw_spin_lock(cpu_lock(state)); 621 raw_spin_lock(cpu_lock(state));
619 622
620 blocks = is_realtime(prev) && !is_running(prev); 623 blocks = is_realtime(prev) && !is_running(prev);
624 completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
625 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
626 state->local_tick);
627
628 if (is_realtime(prev))
629 PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
630 blocks, completion, out_of_time);
631
632 if (completion) {
633 sched_trace_task_completion(prev, 0);
634 pfair_prepare_next_period(prev);
635 prepare_release(prev, cur_release(prev));
636 }
637
638 if (!blocks && (completion || out_of_time)) {
639 drop_all_references(prev);
640 sched_trace_task_release(prev);
641 add_release(&cluster->pfair, prev);
642 }
621 643
622 if (state->local && safe_to_schedule(state->local, cpu_id(state))) 644 if (state->local && safe_to_schedule(state->local, cpu_id(state)))
623 next = state->local; 645 next = state->local;
@@ -649,13 +671,19 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running)
649 cluster = tsk_pfair(t)->cluster; 671 cluster = tsk_pfair(t)->cluster;
650 672
651 raw_spin_lock_irqsave(cluster_lock(cluster), flags); 673 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
652 if (running)
653 t->rt_param.scheduled_on = task_cpu(t);
654 else
655 t->rt_param.scheduled_on = NO_CPU;
656 674
657 prepare_release(t, cluster->pfair_time + 1); 675 prepare_release(t, cluster->pfair_time + 1);
658 pfair_add_release(cluster, t); 676
677 t->rt_param.scheduled_on = NO_CPU;
678
679 if (running) {
680#ifdef CONFIG_RELEASE_MASTER
681 if (task_cpu(t) != cluster->pfair.release_master)
682#endif
683 t->rt_param.scheduled_on = task_cpu(t);
684 __add_ready(&cluster->pfair, t);
685 }
686
659 check_preempt(t); 687 check_preempt(t);
660 688
661 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); 689 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
@@ -665,6 +693,7 @@ static void pfair_task_wake_up(struct task_struct *t)
665{ 693{
666 unsigned long flags; 694 unsigned long flags;
667 lt_t now; 695 lt_t now;
696 int requeue = 0;
668 struct pfair_cluster* cluster; 697 struct pfair_cluster* cluster;
669 698
670 cluster = tsk_pfair(t)->cluster; 699 cluster = tsk_pfair(t)->cluster;
@@ -679,13 +708,20 @@ static void pfair_task_wake_up(struct task_struct *t)
679 * (as if it never blocked at all). Otherwise, we have a 708 * (as if it never blocked at all). Otherwise, we have a
680 * new sporadic job release. 709 * new sporadic job release.
681 */ 710 */
711 requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
682 now = litmus_clock(); 712 now = litmus_clock();
683 if (lt_before(get_deadline(t), now)) { 713 if (lt_before(get_deadline(t), now)) {
714 TRACE_TASK(t, "sporadic release!\n");
684 release_at(t, now); 715 release_at(t, now);
685 prepare_release(t, time2quanta(now, CEIL)); 716 prepare_release(t, time2quanta(now, CEIL));
686 sched_trace_task_release(t); 717 sched_trace_task_release(t);
687 /* FIXME: race with pfair_time advancing */ 718 }
688 pfair_add_release(cluster, t); 719
720 /* only add to ready queue if the task isn't still linked somewhere */
721 if (requeue) {
722 TRACE_TASK(t, "requeueing required\n");
723 tsk_rt(t)->flags = RT_F_RUNNING;
724 __add_ready(&cluster->pfair, t);
689 } 725 }
690 726
691 check_preempt(t); 727 check_preempt(t);
@@ -744,15 +780,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start)
744 release_at(task, start); 780 release_at(task, start);
745 release = time2quanta(start, CEIL); 781 release = time2quanta(start, CEIL);
746 782
747 /* FIXME: support arbitrary offsets. */
748 if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD)
749 release = cluster->pfair_time + PFAIR_MAX_PERIOD;
750
751 TRACE_TASK(task, "sys release at %lu\n", release); 783 TRACE_TASK(task, "sys release at %lu\n", release);
752 784
753 drop_all_references(task); 785 drop_all_references(task);
754 prepare_release(task, release); 786 prepare_release(task, release);
755 pfair_add_release(cluster, task); 787 add_release(&cluster->pfair, task);
756 788
757 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); 789 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
758} 790}
@@ -834,13 +866,6 @@ static long pfair_admit_task(struct task_struct* t)
834 "The period of %s/%d is not a multiple of %llu.\n", 866 "The period of %s/%d is not a multiple of %llu.\n",
835 t->comm, t->pid, (unsigned long long) quantum_length); 867 t->comm, t->pid, (unsigned long long) quantum_length);
836 868
837 if (period >= PFAIR_MAX_PERIOD) {
838 printk(KERN_WARNING
839 "PFAIR: Rejecting task %s/%d; its period is too long.\n",
840 t->comm, t->pid);
841 return -EINVAL;
842 }
843
844 if (quanta == period) { 869 if (quanta == period) {
845 /* special case: task has weight 1.0 */ 870 /* special case: task has weight 1.0 */
846 printk(KERN_INFO 871 printk(KERN_INFO
@@ -880,12 +905,9 @@ static long pfair_admit_task(struct task_struct* t)
880 905
881static void pfair_init_cluster(struct pfair_cluster* cluster) 906static void pfair_init_cluster(struct pfair_cluster* cluster)
882{ 907{
883 int i; 908 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
884 909 bheap_init(&cluster->release_queue);
885 /* initialize release queue */ 910 raw_spin_lock_init(&cluster->release_lock);
886 for (i = 0; i < PFAIR_MAX_PERIOD; i++)
887 bheap_init(&cluster->release_queue[i]);
888 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL);
889 INIT_LIST_HEAD(&cluster->topology.cpus); 911 INIT_LIST_HEAD(&cluster->topology.cpus);
890} 912}
891 913
@@ -899,8 +921,11 @@ static void cleanup_clusters(void)
899 num_pfair_clusters = 0; 921 num_pfair_clusters = 0;
900 922
901 /* avoid stale pointers */ 923 /* avoid stale pointers */
902 for (i = 0; i < NR_CPUS; i++) 924 for (i = 0; i < num_online_cpus(); i++) {
903 pstate[i]->topology.cluster = NULL; 925 pstate[i]->topology.cluster = NULL;
926 printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
927 pstate[i]->missed_updates, pstate[i]->missed_quanta);
928 }
904} 929}
905 930
906static long pfair_activate_plugin(void) 931static long pfair_activate_plugin(void)
@@ -936,6 +961,9 @@ static long pfair_activate_plugin(void)
936 pfair_init_cluster(cluster); 961 pfair_init_cluster(cluster);
937 cluster->pfair_time = now; 962 cluster->pfair_time = now;
938 clust[i] = &cluster->topology; 963 clust[i] = &cluster->topology;
964#ifdef CONFIG_RELEASE_MASTER
965 cluster->pfair.release_master = atomic_read(&release_master_cpu);
966#endif
939 } 967 }
940 968
941 for (i = 0; i < num_online_cpus(); i++) { 969 for (i = 0; i < num_online_cpus(); i++) {
@@ -943,6 +971,7 @@ static long pfair_activate_plugin(void)
943 state->cur_tick = now; 971 state->cur_tick = now;
944 state->local_tick = now; 972 state->local_tick = now;
945 state->missed_quanta = 0; 973 state->missed_quanta = 0;
974 state->missed_updates = 0;
946 state->offset = cpu_stagger_offset(i); 975 state->offset = cpu_stagger_offset(i);
947 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); 976 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
948 cpus[i] = &state->topology; 977 cpus[i] = &state->topology;