aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2009-12-07 15:19:05 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2009-12-07 15:19:05 -0500
commitabbda9b07ef3cf30d925be5f3fbd3118db0b1984 (patch)
tree06aca9665019e1cc42c746e8db2c5ecb74c63687
parente6474fd5f0504534120d7b104b7deb461f1c8bb6 (diff)
Remove PFAIR and P-EDF plugins; will be ported later.
-rw-r--r--litmus/Makefile4
-rw-r--r--litmus/sched_pfair.c882
-rw-r--r--litmus/sched_psn_edf.c454
3 files changed, 1 insertions, 1339 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index f7c7e02fd9..763f28c3c2 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -7,9 +7,7 @@ obj-y = sched_plugin.o litmus.o \
7 rt_domain.o fdso.o sync.o \ 7 rt_domain.o fdso.o sync.o \
8 fmlp.o srp.o \ 8 fmlp.o srp.o \
9 heap.o \ 9 heap.o \
10 sched_gsn_edf.o \ 10 sched_gsn_edf.o
11 sched_psn_edf.o \
12 sched_pfair.o
13 11
14obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 12obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
15obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 13obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
deleted file mode 100644
index f623e0e11a..0000000000
--- a/litmus/sched_pfair.c
+++ /dev/null
@@ -1,882 +0,0 @@
1/*
2 * kernel/sched_pfair.c
3 *
4 * Implementation of the (global) Pfair scheduling algorithm.
5 *
6 */
7
8#include <asm/div64.h>
9#include <linux/delay.h>
10#include <linux/module.h>
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/list.h>
15
16#include <litmus/litmus.h>
17#include <litmus/jobs.h>
18#include <litmus/rt_domain.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21
22#include <litmus/heap.h>
23
24struct subtask {
25 /* measured in quanta relative to job release */
26 quanta_t release;
27 quanta_t deadline;
28 quanta_t overlap; /* called "b bit" by PD^2 */
29 quanta_t group_deadline;
30};
31
32struct pfair_param {
33 quanta_t quanta; /* number of subtasks */
34 quanta_t cur; /* index of current subtask */
35
36 quanta_t release; /* in quanta */
37 quanta_t period; /* in quanta */
38
39 quanta_t last_quantum; /* when scheduled last */
40 int last_cpu; /* where scheduled last */
41
42 unsigned int sporadic_release; /* On wakeup, new sporadic release? */
43
44 struct subtask subtasks[0]; /* allocate together with pfair_param */
45};
46
47#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
48
49struct pfair_state {
50 int cpu;
51 volatile quanta_t cur_tick; /* updated by the CPU that is advancing
52 * the time */
53 volatile quanta_t local_tick; /* What tick is the local CPU currently
54 * executing? Updated only by the local
55 * CPU. In QEMU, this may lag behind the
56 * current tick. In a real system, with
57 * proper timers and aligned quanta,
58 * that should only be the
59 * case for a very short time after the
60 * time advanced. With staggered quanta,
61 * it will lag for the duration of the
62 * offset.
63 */
64
65 struct task_struct* linked; /* the task that should be executing */
66 struct task_struct* local; /* the local copy of linked */
67 struct task_struct* scheduled; /* what is actually scheduled */
68
69 unsigned long missed_quanta;
70 lt_t offset; /* stagger offset */
71};
72
73/* Currently, we limit the maximum period of any task to 2000 quanta.
74 * The reason is that it makes the implementation easier since we do not
75 * need to reallocate the release wheel on task arrivals.
76 * In the future
77 */
78#define PFAIR_MAX_PERIOD 2000
79
80/* This is the release queue wheel. It is indexed by pfair_time %
81 * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be
82 * merged with the ready queue.
83 */
84static struct heap release_queue[PFAIR_MAX_PERIOD];
85
86DEFINE_PER_CPU(struct pfair_state, pfair_state);
87struct pfair_state* pstate[NR_CPUS]; /* short cut */
88
89static quanta_t pfair_time = 0; /* the "official" PFAIR clock */
90static quanta_t merge_time = 0; /* Updated after the release queue has been
91 * merged. Used by drop_all_references().
92 */
93
94static rt_domain_t pfair;
95
96/* The pfair_lock is used to serialize all scheduling events.
97 */
98#define pfair_lock pfair.ready_lock
99
100/* Enable for lots of trace info.
101 * #define PFAIR_DEBUG
102 */
103
104#ifdef PFAIR_DEBUG
105#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
106#define PTRACE(f, args...) TRACE(f, ## args)
107#else
108#define PTRACE_TASK(t, f, args...)
109#define PTRACE(f, args...)
110#endif
111
112/* gcc will inline all of these accessor functions... */
113static struct subtask* cur_subtask(struct task_struct* t)
114{
115 return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
116}
117
118static quanta_t cur_deadline(struct task_struct* t)
119{
120 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
121}
122
123
124static quanta_t cur_sub_release(struct task_struct* t)
125{
126 return cur_subtask(t)->release + tsk_pfair(t)->release;
127}
128
129static quanta_t cur_release(struct task_struct* t)
130{
131#ifdef EARLY_RELEASE
132 /* only the release of the first subtask counts when we early
133 * release */
134 return tsk_pfair(t)->release;
135#else
136 return cur_sub_release(t);
137#endif
138}
139
140static quanta_t cur_overlap(struct task_struct* t)
141{
142 return cur_subtask(t)->overlap;
143}
144
145static quanta_t cur_group_deadline(struct task_struct* t)
146{
147 quanta_t gdl = cur_subtask(t)->group_deadline;
148 if (gdl)
149 return gdl + tsk_pfair(t)->release;
150 else
151 return gdl;
152}
153
154
155static int pfair_higher_prio(struct task_struct* first,
156 struct task_struct* second)
157{
158 return /* first task must exist */
159 first && (
160 /* Does the second task exist and is it a real-time task? If
161 * not, the first task (which is a RT task) has higher
162 * priority.
163 */
164 !second || !is_realtime(second) ||
165
166 /* Is the (subtask) deadline of the first task earlier?
167 * Then it has higher priority.
168 */
169 time_before(cur_deadline(first), cur_deadline(second)) ||
170
171 /* Do we have a deadline tie?
172 * Then break by B-bit.
173 */
174 (cur_deadline(first) == cur_deadline(second) &&
175 (cur_overlap(first) > cur_overlap(second) ||
176
177 /* Do we have a B-bit tie?
178 * Then break by group deadline.
179 */
180 (cur_overlap(first) == cur_overlap(second) &&
181 (time_after(cur_group_deadline(first),
182 cur_group_deadline(second)) ||
183
184 /* Do we have a group deadline tie?
185 * Then break by PID, which are unique.
186 */
187 (cur_group_deadline(first) ==
188 cur_group_deadline(second) &&
189 first->pid < second->pid))))));
190}
191
192int pfair_ready_order(struct heap_node* a, struct heap_node* b)
193{
194 return pfair_higher_prio(heap2task(a), heap2task(b));
195}
196
197/* return the proper release queue for time t */
198static struct heap* relq(quanta_t t)
199{
200 struct heap* rq = &release_queue[t % PFAIR_MAX_PERIOD];
201 return rq;
202}
203
204static void prepare_release(struct task_struct* t, quanta_t at)
205{
206 tsk_pfair(t)->release = at;
207 tsk_pfair(t)->cur = 0;
208}
209
210static void __pfair_add_release(struct task_struct* t, struct heap* queue)
211{
212 heap_insert(pfair_ready_order, queue,
213 tsk_rt(t)->heap_node);
214}
215
216static void pfair_add_release(struct task_struct* t)
217{
218 BUG_ON(heap_node_in_heap(tsk_rt(t)->heap_node));
219 __pfair_add_release(t, relq(cur_release(t)));
220}
221
222/* pull released tasks from the release queue */
223static void poll_releases(quanta_t time)
224{
225 __merge_ready(&pfair, relq(time));
226 merge_time = time;
227}
228
229static void check_preempt(struct task_struct* t)
230{
231 int cpu = NO_CPU;
232 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
233 tsk_rt(t)->present) {
234 /* the task can be scheduled and
235 * is not scheduled where it ought to be scheduled
236 */
237 cpu = tsk_rt(t)->linked_on != NO_CPU ?
238 tsk_rt(t)->linked_on :
239 tsk_rt(t)->scheduled_on;
240 PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
241 tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
242 /* preempt */
243 if (cpu == smp_processor_id())
244 set_tsk_need_resched(current);
245 else {
246 smp_send_reschedule(cpu);
247 }
248 }
249}
250
251/* caller must hold pfair_lock */
252static void drop_all_references(struct task_struct *t)
253{
254 int cpu;
255 struct pfair_state* s;
256 struct heap* q;
257 if (heap_node_in_heap(tsk_rt(t)->heap_node)) {
258 /* figure out what queue the node is in */
259 if (time_before_eq(cur_release(t), merge_time))
260 q = &pfair.ready_queue;
261 else
262 q = relq(cur_release(t));
263 heap_delete(pfair_ready_order, q,
264 tsk_rt(t)->heap_node);
265 }
266 for (cpu = 0; cpu < NR_CPUS; cpu++) {
267 s = &per_cpu(pfair_state, cpu);
268 if (s->linked == t)
269 s->linked = NULL;
270 if (s->local == t)
271 s->local = NULL;
272 if (s->scheduled == t)
273 s->scheduled = NULL;
274 }
275}
276
277/* returns 1 if the task needs to go the release queue */
278static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
279{
280 struct pfair_param* p = tsk_pfair(t);
281 int to_relq;
282 p->cur = (p->cur + 1) % p->quanta;
283 if (!p->cur) {
284 sched_trace_task_completion(t, 1);
285 if (tsk_rt(t)->present) {
286 /* we start a new job */
287 prepare_for_next_period(t);
288 sched_trace_task_release(t);
289 get_rt_flags(t) = RT_F_RUNNING;
290 p->release += p->period;
291 } else {
292 /* remove task from system until it wakes */
293 drop_all_references(t);
294 tsk_pfair(t)->sporadic_release = 1;
295 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
296 cpu, p->cur);
297 return 0;
298 }
299 }
300 to_relq = time_after(cur_release(t), time);
301 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n",
302 cpu, p->cur, to_relq);
303 return to_relq;
304}
305
306static void advance_subtasks(quanta_t time)
307{
308 int cpu, missed;
309 struct task_struct* l;
310 struct pfair_param* p;
311
312 for_each_online_cpu(cpu) {
313 l = pstate[cpu]->linked;
314 missed = pstate[cpu]->linked != pstate[cpu]->local;
315 if (l) {
316 p = tsk_pfair(l);
317 p->last_quantum = time;
318 p->last_cpu = cpu;
319 if (advance_subtask(time, l, cpu)) {
320 pstate[cpu]->linked = NULL;
321 pfair_add_release(l);
322 }
323 }
324 }
325}
326
327static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
328{
329 int cpu;
330 if (tsk_rt(t)->scheduled_on != NO_CPU) {
331 /* always observe scheduled_on linkage */
332 default_cpu = tsk_rt(t)->scheduled_on;
333 } else if (tsk_pfair(t)->last_quantum == time - 1) {
334 /* back2back quanta */
335 /* Only observe last_quantum if no scheduled_on is in the way.
336 * This should only kick in if a CPU missed quanta, and that
337 * *should* only happen in QEMU.
338 */
339 cpu = tsk_pfair(t)->last_cpu;
340 if (!pstate[cpu]->linked ||
341 tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
342 default_cpu = cpu;
343 }
344 }
345 return default_cpu;
346}
347
348/* returns one if linking was redirected */
349static int pfair_link(quanta_t time, int cpu,
350 struct task_struct* t)
351{
352 int target = target_cpu(time, t, cpu);
353 struct task_struct* prev = pstate[cpu]->linked;
354 struct task_struct* other;
355
356 if (target != cpu) {
357 other = pstate[target]->linked;
358 pstate[target]->linked = t;
359 tsk_rt(t)->linked_on = target;
360 if (!other)
361 /* linked ok, but reschedule this CPU */
362 return 1;
363 if (target < cpu) {
364 /* link other to cpu instead */
365 tsk_rt(other)->linked_on = cpu;
366 pstate[cpu]->linked = other;
367 if (prev) {
368 /* prev got pushed back into the ready queue */
369 tsk_rt(prev)->linked_on = NO_CPU;
370 __add_ready(&pfair, prev);
371 }
372 /* we are done with this cpu */
373 return 0;
374 } else {
375 /* re-add other, it's original CPU was not considered yet */
376 tsk_rt(other)->linked_on = NO_CPU;
377 __add_ready(&pfair, other);
378 /* reschedule this CPU */
379 return 1;
380 }
381 } else {
382 pstate[cpu]->linked = t;
383 tsk_rt(t)->linked_on = cpu;
384 if (prev) {
385 /* prev got pushed back into the ready queue */
386 tsk_rt(prev)->linked_on = NO_CPU;
387 __add_ready(&pfair, prev);
388 }
389 /* we are done with this CPU */
390 return 0;
391 }
392}
393
394static void schedule_subtasks(quanta_t time)
395{
396 int cpu, retry;
397
398 for_each_online_cpu(cpu) {
399 retry = 1;
400 while (retry) {
401 if (pfair_higher_prio(__peek_ready(&pfair),
402 pstate[cpu]->linked))
403 retry = pfair_link(time, cpu,
404 __take_ready(&pfair));
405 else
406 retry = 0;
407 }
408 }
409}
410
411static void schedule_next_quantum(quanta_t time)
412{
413 int cpu;
414
415 /* called with interrupts disabled */
416 PTRACE("--- Q %lu at %llu PRE-SPIN\n",
417 time, litmus_clock());
418 spin_lock(&pfair_lock);
419 PTRACE("<<< Q %lu at %llu\n",
420 time, litmus_clock());
421
422 sched_trace_quantum_boundary();
423
424 advance_subtasks(time);
425 poll_releases(time);
426 schedule_subtasks(time);
427
428 for (cpu = 0; cpu < NR_CPUS; cpu++)
429 if (pstate[cpu]->linked)
430 PTRACE_TASK(pstate[cpu]->linked,
431 " linked on %d.\n", cpu);
432 else
433 PTRACE("(null) linked on %d.\n", cpu);
434
435 /* We are done. Advance time. */
436 mb();
437 for (cpu = 0; cpu < NR_CPUS; cpu++) {
438 if (pstate[cpu]->local_tick != pstate[cpu]->cur_tick) {
439 TRACE("BAD Quantum not acked on %d "
440 "(l:%lu c:%lu p:%lu)\n",
441 cpu,
442 pstate[cpu]->local_tick,
443 pstate[cpu]->cur_tick,
444 pfair_time);
445 pstate[cpu]->missed_quanta++;
446 }
447 pstate[cpu]->cur_tick = time;
448 }
449 PTRACE(">>> Q %lu at %llu\n",
450 time, litmus_clock());
451 spin_unlock(&pfair_lock);
452}
453
454static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
455{
456 quanta_t loc;
457
458 goto first; /* skip mb() on first iteration */
459 do {
460 cpu_relax();
461 mb();
462 first: loc = state->cur_tick;
463 /* FIXME: what if loc > cur? */
464 } while (time_before(loc, q));
465 PTRACE("observed cur_tick:%lu >= q:%lu\n",
466 loc, q);
467}
468
469static quanta_t current_quantum(struct pfair_state* state)
470{
471 lt_t t = litmus_clock() - state->offset;
472 return time2quanta(t, FLOOR);
473}
474
475static void catchup_quanta(quanta_t from, quanta_t target,
476 struct pfair_state* state)
477{
478 quanta_t cur = from, time;
479 TRACE("+++< BAD catching up quanta from %lu to %lu\n",
480 from, target);
481 while (time_before(cur, target)) {
482 wait_for_quantum(cur, state);
483 cur++;
484 time = cmpxchg(&pfair_time,
485 cur - 1, /* expected */
486 cur /* next */
487 );
488 if (time == cur - 1)
489 schedule_next_quantum(cur);
490 }
491 TRACE("+++> catching up done\n");
492}
493
494/* pfair_tick - this function is called for every local timer
495 * interrupt.
496 */
497static void pfair_tick(struct task_struct* t)
498{
499 struct pfair_state* state = &__get_cpu_var(pfair_state);
500 quanta_t time, cur;
501 int retry = 10;
502
503 do {
504 cur = current_quantum(state);
505 PTRACE("q %lu at %llu\n", cur, litmus_clock());
506
507 /* Attempt to advance time. First CPU to get here
508 * will prepare the next quantum.
509 */
510 time = cmpxchg(&pfair_time,
511 cur - 1, /* expected */
512 cur /* next */
513 );
514 if (time == cur - 1) {
515 /* exchange succeeded */
516 wait_for_quantum(cur - 1, state);
517 schedule_next_quantum(cur);
518 retry = 0;
519 } else if (time_before(time, cur - 1)) {
520 /* the whole system missed a tick !? */
521 catchup_quanta(time, cur, state);
522 retry--;
523 } else if (time_after(time, cur)) {
524 /* our timer lagging behind!? */
525 TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
526 retry--;
527 } else {
528 /* Some other CPU already started scheduling
529 * this quantum. Let it do its job and then update.
530 */
531 retry = 0;
532 }
533 } while (retry);
534
535 /* Spin locally until time advances. */
536 wait_for_quantum(cur, state);
537
538 /* copy assignment */
539 /* FIXME: what if we race with a future update? Corrupted state? */
540 state->local = state->linked;
541 /* signal that we are done */
542 mb();
543 state->local_tick = state->cur_tick;
544
545 if (state->local != current
546 && (is_realtime(current) || is_present(state->local)))
547 set_tsk_need_resched(current);
548}
549
550static int safe_to_schedule(struct task_struct* t, int cpu)
551{
552 int where = tsk_rt(t)->scheduled_on;
553 if (where != NO_CPU && where != cpu) {
554 TRACE_TASK(t, "BAD: can't be scheduled on %d, "
555 "scheduled already on %d.\n", cpu, where);
556 return 0;
557 } else
558 return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING;
559}
560
561static struct task_struct* pfair_schedule(struct task_struct * prev)
562{
563 struct pfair_state* state = &__get_cpu_var(pfair_state);
564 int blocks;
565 struct task_struct* next = NULL;
566
567 spin_lock(&pfair_lock);
568
569 blocks = is_realtime(prev) && !is_running(prev);
570
571 if (state->local && safe_to_schedule(state->local, state->cpu))
572 next = state->local;
573
574 if (prev != next) {
575 tsk_rt(prev)->scheduled_on = NO_CPU;
576 if (next)
577 tsk_rt(next)->scheduled_on = state->cpu;
578 }
579
580 spin_unlock(&pfair_lock);
581
582 if (next)
583 TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
584 tsk_pfair(next)->release, pfair_time, litmus_clock());
585 else if (is_realtime(prev))
586 TRACE("Becomes idle at %lu (%llu)\n", pfair_time, litmus_clock());
587
588 return next;
589}
590
591static void pfair_task_new(struct task_struct * t, int on_rq, int running)
592{
593 unsigned long flags;
594
595 TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
596
597 spin_lock_irqsave(&pfair_lock, flags);
598 if (running)
599 t->rt_param.scheduled_on = task_cpu(t);
600 else
601 t->rt_param.scheduled_on = NO_CPU;
602
603 prepare_release(t, pfair_time + 1);
604 tsk_pfair(t)->sporadic_release = 0;
605 pfair_add_release(t);
606 check_preempt(t);
607
608 spin_unlock_irqrestore(&pfair_lock, flags);
609}
610
611static void pfair_task_wake_up(struct task_struct *t)
612{
613 unsigned long flags;
614 lt_t now;
615
616 TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
617 litmus_clock(), cur_release(t), pfair_time);
618
619 spin_lock_irqsave(&pfair_lock, flags);
620
621 /* It is a little unclear how to deal with Pfair
622 * tasks that block for a while and then wake. For now,
623 * if a task blocks and wakes before its next job release,
624 * then it may resume if it is currently linked somewhere
625 * (as if it never blocked at all). Otherwise, we have a
626 * new sporadic job release.
627 */
628 if (tsk_pfair(t)->sporadic_release) {
629 now = litmus_clock();
630 release_at(t, now);
631 prepare_release(t, time2quanta(now, CEIL));
632 sched_trace_task_release(t);
633 /* FIXME: race with pfair_time advancing */
634 pfair_add_release(t);
635 tsk_pfair(t)->sporadic_release = 0;
636 }
637
638 check_preempt(t);
639
640 spin_unlock_irqrestore(&pfair_lock, flags);
641 TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
642}
643
644static void pfair_task_block(struct task_struct *t)
645{
646 BUG_ON(!is_realtime(t));
647 TRACE_TASK(t, "blocks at %llu, state:%d\n",
648 litmus_clock(), t->state);
649}
650
651static void pfair_task_exit(struct task_struct * t)
652{
653 unsigned long flags;
654
655 BUG_ON(!is_realtime(t));
656
657 /* Remote task from release or ready queue, and ensure
658 * that it is not the scheduled task for ANY CPU. We
659 * do this blanket check because occassionally when
660 * tasks exit while blocked, the task_cpu of the task
661 * might not be the same as the CPU that the PFAIR scheduler
662 * has chosen for it.
663 */
664 spin_lock_irqsave(&pfair_lock, flags);
665
666 TRACE_TASK(t, "RIP, state:%d\n", t->state);
667 drop_all_references(t);
668
669 spin_unlock_irqrestore(&pfair_lock, flags);
670
671 kfree(t->rt_param.pfair);
672 t->rt_param.pfair = NULL;
673}
674
675
676static void pfair_release_at(struct task_struct* task, lt_t start)
677{
678 unsigned long flags;
679 quanta_t release;
680
681 BUG_ON(!is_realtime(task));
682
683 spin_lock_irqsave(&pfair_lock, flags);
684 release_at(task, start);
685 release = time2quanta(start, CEIL);
686
687 if (release - pfair_time >= PFAIR_MAX_PERIOD)
688 release = pfair_time + PFAIR_MAX_PERIOD;
689
690 TRACE_TASK(task, "sys release at %lu\n", release);
691
692 drop_all_references(task);
693 prepare_release(task, release);
694 pfair_add_release(task);
695
696 /* Clear sporadic release flag, since this release subsumes any
697 * sporadic release on wake.
698 */
699 tsk_pfair(task)->sporadic_release = 0;
700
701 spin_unlock_irqrestore(&pfair_lock, flags);
702}
703
704static void init_subtask(struct subtask* sub, unsigned long i,
705 lt_t quanta, lt_t period)
706{
707 /* since i is zero-based, the formulas are shifted by one */
708 lt_t tmp;
709
710 /* release */
711 tmp = period * i;
712 do_div(tmp, quanta); /* floor */
713 sub->release = (quanta_t) tmp;
714
715 /* deadline */
716 tmp = period * (i + 1);
717 if (do_div(tmp, quanta)) /* ceil */
718 tmp++;
719 sub->deadline = (quanta_t) tmp;
720
721 /* next release */
722 tmp = period * (i + 1);
723 do_div(tmp, quanta); /* floor */
724 sub->overlap = sub->deadline - (quanta_t) tmp;
725
726 /* Group deadline.
727 * Based on the formula given in Uma's thesis.
728 */
729 if (2 * quanta >= period) {
730 /* heavy */
731 tmp = (sub->deadline - (i + 1)) * period;
732 if (period > quanta &&
733 do_div(tmp, (period - quanta))) /* ceil */
734 tmp++;
735 sub->group_deadline = (quanta_t) tmp;
736 } else
737 sub->group_deadline = 0;
738}
739
740static void dump_subtasks(struct task_struct* t)
741{
742 unsigned long i;
743 for (i = 0; i < t->rt_param.pfair->quanta; i++)
744 TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
745 i + 1,
746 t->rt_param.pfair->subtasks[i].release,
747 t->rt_param.pfair->subtasks[i].deadline,
748 t->rt_param.pfair->subtasks[i].overlap,
749 t->rt_param.pfair->subtasks[i].group_deadline);
750}
751
752static long pfair_admit_task(struct task_struct* t)
753{
754 lt_t quanta;
755 lt_t period;
756 s64 quantum_length = ktime_to_ns(tick_period);
757 struct pfair_param* param;
758 unsigned long i;
759
760 /* Pfair is a tick-based method, so the time
761 * of interest is jiffies. Calculate tick-based
762 * times for everything.
763 * (Ceiling of exec cost, floor of period.)
764 */
765
766 quanta = get_exec_cost(t);
767 period = get_rt_period(t);
768
769 quanta = time2quanta(get_exec_cost(t), CEIL);
770
771 if (do_div(period, quantum_length))
772 printk(KERN_WARNING
773 "The period of %s/%d is not a multiple of %llu.\n",
774 t->comm, t->pid, (unsigned long long) quantum_length);
775
776 if (period >= PFAIR_MAX_PERIOD) {
777 printk(KERN_WARNING
778 "PFAIR: Rejecting task %s/%d; its period is too long.\n",
779 t->comm, t->pid);
780 return -EINVAL;
781 }
782
783 if (quanta == period) {
784 /* special case: task has weight 1.0 */
785 printk(KERN_INFO
786 "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
787 t->comm, t->pid, quanta, period);
788 quanta = 1;
789 period = 1;
790 }
791
792 param = kmalloc(sizeof(*param) +
793 quanta * sizeof(struct subtask), GFP_ATOMIC);
794
795 if (!param)
796 return -ENOMEM;
797
798 param->quanta = quanta;
799 param->cur = 0;
800 param->release = 0;
801 param->period = period;
802
803 for (i = 0; i < quanta; i++)
804 init_subtask(param->subtasks + i, i, quanta, period);
805
806 if (t->rt_param.pfair)
807 /* get rid of stale allocation */
808 kfree(t->rt_param.pfair);
809
810 t->rt_param.pfair = param;
811
812 /* spew out some debug info */
813 dump_subtasks(t);
814
815 return 0;
816}
817
818static long pfair_activate_plugin(void)
819{
820 int cpu;
821 struct pfair_state* state;
822
823 state = &__get_cpu_var(pfair_state);
824 pfair_time = current_quantum(state);
825
826 TRACE("Activating PFAIR at q=%lu\n", pfair_time);
827
828 for (cpu = 0; cpu < NR_CPUS; cpu++) {
829 state = &per_cpu(pfair_state, cpu);
830 state->cur_tick = pfair_time;
831 state->local_tick = pfair_time;
832 state->missed_quanta = 0;
833 state->offset = cpu_stagger_offset(cpu);
834 }
835
836 return 0;
837}
838
839/* Plugin object */
840static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
841 .plugin_name = "PFAIR",
842 .tick = pfair_tick,
843 .task_new = pfair_task_new,
844 .task_exit = pfair_task_exit,
845 .schedule = pfair_schedule,
846 .task_wake_up = pfair_task_wake_up,
847 .task_block = pfair_task_block,
848 .admit_task = pfair_admit_task,
849 .release_at = pfair_release_at,
850 .complete_job = complete_job,
851 .activate_plugin = pfair_activate_plugin,
852};
853
854static int __init init_pfair(void)
855{
856 int cpu, i;
857 struct pfair_state *state;
858
859 /* initialize release queue */
860 for (i = 0; i < PFAIR_MAX_PERIOD; i++)
861 heap_init(&release_queue[i]);
862
863 /* initialize CPU state */
864 for (cpu = 0; cpu < NR_CPUS; cpu++) {
865 state = &per_cpu(pfair_state, cpu);
866 state->cpu = cpu;
867 state->cur_tick = 0;
868 state->local_tick = 0;
869 state->linked = NULL;
870 state->local = NULL;
871 state->scheduled = NULL;
872 state->missed_quanta = 0;
873 state->offset = cpu_stagger_offset(cpu);
874 pstate[cpu] = state;
875 }
876
877 rt_domain_init(&pfair, pfair_ready_order, NULL, NULL);
878 return register_sched_plugin(&pfair_plugin);
879}
880
881module_init(init_pfair);
882
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
deleted file mode 100644
index 9a2bdfcba0..0000000000
--- a/litmus/sched_psn_edf.c
+++ /dev/null
@@ -1,454 +0,0 @@
1
2/*
3 * kernel/sched_psn_edf.c
4 *
5 * Implementation of the PSN-EDF scheduler plugin.
6 * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
7 *
8 * Suspensions and non-preemptable sections are supported.
9 * Priority inheritance is not supported.
10 */
11
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16
17#include <linux/module.h>
18
19#include <litmus/litmus.h>
20#include <litmus/jobs.h>
21#include <litmus/sched_plugin.h>
22#include <litmus/edf_common.h>
23
24
25typedef struct {
26 rt_domain_t domain;
27 int cpu;
28 struct task_struct* scheduled; /* only RT tasks */
29
30/* scheduling lock
31 */
32#define slock domain.ready_lock
33/* protects the domain and
34 * serializes scheduling decisions
35 */
36} psnedf_domain_t;
37
38DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
39
40#define local_edf (&__get_cpu_var(psnedf_domains).domain)
41#define local_pedf (&__get_cpu_var(psnedf_domains))
42#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
43#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
44#define task_edf(task) remote_edf(get_partition(task))
45#define task_pedf(task) remote_pedf(get_partition(task))
46
47
48static void psnedf_domain_init(psnedf_domain_t* pedf,
49 check_resched_needed_t check,
50 release_jobs_t release,
51 int cpu)
52{
53 edf_domain_init(&pedf->domain, check, release);
54 pedf->cpu = cpu;
55 pedf->scheduled = NULL;
56}
57
58static void requeue(struct task_struct* t, rt_domain_t *edf)
59{
60 if (t->state != TASK_RUNNING)
61 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
62
63 set_rt_flags(t, RT_F_RUNNING);
64 if (is_released(t, litmus_clock()))
65 __add_ready(edf, t);
66 else
67 add_release(edf, t); /* it has got to wait */
68}
69
70/* we assume the lock is being held */
71static void preempt(psnedf_domain_t *pedf)
72{
73 if (smp_processor_id() == pedf->cpu) {
74 if (pedf->scheduled && is_np(pedf->scheduled))
75 request_exit_np(pedf->scheduled);
76 else
77 set_tsk_need_resched(current);
78 } else
79 /* in case that it is a remote CPU we have to defer the
80 * the decision to the remote CPU
81 */
82 smp_send_reschedule(pedf->cpu);
83}
84
85/* This check is trivial in partioned systems as we only have to consider
86 * the CPU of the partition.
87 */
88static int psnedf_check_resched(rt_domain_t *edf)
89{
90 psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
91 int ret = 0;
92
93 /* because this is a callback from rt_domain_t we already hold
94 * the necessary lock for the ready queue
95 */
96 if (edf_preemption_needed(edf, pedf->scheduled)) {
97 preempt(pedf);
98 ret = 1;
99 }
100 return ret;
101}
102
103static void psnedf_tick(struct task_struct *t)
104{
105 psnedf_domain_t *pedf = local_pedf;
106
107 /* Check for inconsistency. We don't need the lock for this since
108 * ->scheduled is only changed in schedule, which obviously is not
109 * executing in parallel on this CPU
110 */
111 BUG_ON(is_realtime(t) && t != pedf->scheduled);
112
113 if (is_realtime(t) && budget_exhausted(t)) {
114 if (!is_np(t))
115 set_tsk_need_resched(t);
116 else {
117 TRACE("psnedf_scheduler_tick: "
118 "%d is non-preemptable, "
119 "preemption delayed.\n", t->pid);
120 request_exit_np(t);
121 }
122 }
123}
124
125static void job_completion(struct task_struct* t)
126{
127 TRACE_TASK(t, "job_completion().\n");
128 set_rt_flags(t, RT_F_SLEEP);
129 prepare_for_next_period(t);
130}
131
132static struct task_struct* psnedf_schedule(struct task_struct * prev)
133{
134 psnedf_domain_t* pedf = local_pedf;
135 rt_domain_t* edf = &pedf->domain;
136 struct task_struct* next;
137
138 int out_of_time, sleep, preempt,
139 np, exists, blocks, resched;
140
141 spin_lock(&pedf->slock);
142
143 /* sanity checking */
144 BUG_ON(pedf->scheduled && pedf->scheduled != prev);
145 BUG_ON(pedf->scheduled && !is_realtime(prev));
146
147 /* (0) Determine state */
148 exists = pedf->scheduled != NULL;
149 blocks = exists && !is_running(pedf->scheduled);
150 out_of_time = exists && budget_exhausted(pedf->scheduled);
151 np = exists && is_np(pedf->scheduled);
152 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
153 preempt = edf_preemption_needed(edf, prev);
154
155 /* If we need to preempt do so.
156 * The following checks set resched to 1 in case of special
157 * circumstances.
158 */
159 resched = preempt;
160
161 /* If a task blocks we have no choice but to reschedule.
162 */
163 if (blocks)
164 resched = 1;
165
166 /* Request a sys_exit_np() call if we would like to preempt but cannot.
167 * Multiple calls to request_exit_np() don't hurt.
168 */
169 if (np && (out_of_time || preempt || sleep))
170 request_exit_np(pedf->scheduled);
171
172 /* Any task that is preemptable and either exhausts its execution
173 * budget or wants to sleep completes. We may have to reschedule after
174 * this.
175 */
176 if (!np && (out_of_time || sleep) && !blocks) {
177 job_completion(pedf->scheduled);
178 resched = 1;
179 }
180
181 /* The final scheduling decision. Do we need to switch for some reason?
182 * Switch if we are in RT mode and have no task or if we need to
183 * resched.
184 */
185 next = NULL;
186 if ((!np || blocks) && (resched || !exists)) {
187 /* Take care of a previously scheduled
188 * job by taking it out of the Linux runqueue.
189 */
190 if (pedf->scheduled && !blocks)
191 requeue(pedf->scheduled, edf);
192 next = __take_ready(edf);
193 } else
194 /* Only override Linux scheduler if we have a real-time task
195 * scheduled that needs to continue.
196 */
197 if (exists)
198 next = prev;
199
200 if (next) {
201 TRACE_TASK(next, " == next\n");
202 set_rt_flags(next, RT_F_RUNNING);
203 } else {
204 TRACE("becoming idle.\n");
205 }
206
207 pedf->scheduled = next;
208 spin_unlock(&pedf->slock);
209
210 return next;
211}
212
213
214/* Prepare a task for running in RT mode
215 */
216static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
217{
218 rt_domain_t* edf = task_edf(t);
219 psnedf_domain_t* pedf = task_pedf(t);
220 unsigned long flags;
221
222 TRACE_TASK(t, "new\n");
223
224 /* setup job parameters */
225 release_at(t, litmus_clock());
226
227 /* The task should be running in the queue, otherwise signal
228 * code will try to wake it up with fatal consequences.
229 */
230 spin_lock_irqsave(&pedf->slock, flags);
231 if (running) {
232 /* there shouldn't be anything else running at the time */
233 BUG_ON(pedf->scheduled);
234 pedf->scheduled = t;
235 } else {
236 requeue(t, edf);
237 /* maybe we have to reschedule */
238 preempt(pedf);
239 }
240 spin_unlock_irqrestore(&pedf->slock, flags);
241}
242
243static void psnedf_task_wake_up(struct task_struct *task)
244{
245 unsigned long flags;
246 psnedf_domain_t* pedf = task_pedf(task);
247 rt_domain_t* edf = task_edf(task);
248 lt_t now;
249
250 TRACE_TASK(task, "wake up\n");
251 spin_lock_irqsave(&pedf->slock, flags);
252 BUG_ON(is_queued(task));
253 /* We need to take suspensions because of semaphores into
254 * account! If a job resumes after being suspended due to acquiring
255 * a semaphore, it should never be treated as a new job release.
256 *
257 * FIXME: This should be done in some more predictable and userspace-controlled way.
258 */
259 now = litmus_clock();
260 if (is_tardy(task, now) &&
261 get_rt_flags(task) != RT_F_EXIT_SEM) {
262 /* new sporadic release */
263 release_at(task, now);
264 sched_trace_task_release(task);
265 }
266 requeue(task, edf);
267 spin_unlock_irqrestore(&pedf->slock, flags);
268 TRACE_TASK(task, "wake up done\n");
269}
270
271static void psnedf_task_block(struct task_struct *t)
272{
273 /* only running tasks can block, thus t is in no queue */
274 TRACE_TASK(t, "block, state=%d\n", t->state);
275 BUG_ON(!is_realtime(t));
276 BUG_ON(is_queued(t));
277}
278
279static void psnedf_task_exit(struct task_struct * t)
280{
281 unsigned long flags;
282 psnedf_domain_t* pedf = task_pedf(t);
283 rt_domain_t* edf;
284
285 spin_lock_irqsave(&pedf->slock, flags);
286 if (is_queued(t)) {
287 /* dequeue */
288 edf = task_edf(t);
289 remove(edf, t);
290 }
291 if (pedf->scheduled == t)
292 pedf->scheduled = NULL;
293 preempt(pedf);
294 spin_unlock_irqrestore(&pedf->slock, flags);
295}
296
297#ifdef CONFIG_FMLP
298static long psnedf_pi_block(struct pi_semaphore *sem,
299 struct task_struct *new_waiter)
300{
301 psnedf_domain_t* pedf;
302 rt_domain_t* edf;
303 struct task_struct* t;
304 int cpu = get_partition(new_waiter);
305
306 BUG_ON(!new_waiter);
307
308 if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
309 TRACE_TASK(new_waiter, " boosts priority\n");
310 pedf = task_pedf(new_waiter);
311 edf = task_edf(new_waiter);
312
313 /* interrupts already disabled */
314 spin_lock(&pedf->slock);
315
316 /* store new highest-priority task */
317 sem->hp.cpu_task[cpu] = new_waiter;
318 if (sem->holder &&
319 get_partition(sem->holder) == get_partition(new_waiter)) {
320 /* let holder inherit */
321 sem->holder->rt_param.inh_task = new_waiter;
322 t = sem->holder;
323 if (is_queued(t)) {
324 /* queued in domain*/
325 remove(edf, t);
326 /* readd to make priority change take place */
327 /* FIXME: this looks outdated */
328 if (is_released(t, litmus_clock()))
329 __add_ready(edf, t);
330 else
331 add_release(edf, t);
332 }
333 }
334
335 /* check if we need to reschedule */
336 if (edf_preemption_needed(edf, current))
337 preempt(pedf);
338
339 spin_unlock(&pedf->slock);
340 }
341
342 return 0;
343}
344
345static long psnedf_inherit_priority(struct pi_semaphore *sem,
346 struct task_struct *new_owner)
347{
348 int cpu = get_partition(new_owner);
349
350 new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
351 if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
352 TRACE_TASK(new_owner,
353 "inherited priority from %s/%d\n",
354 sem->hp.cpu_task[cpu]->comm,
355 sem->hp.cpu_task[cpu]->pid);
356 } else
357 TRACE_TASK(new_owner,
358 "cannot inherit priority: "
359 "no higher priority job waits on this CPU!\n");
360 /* make new owner non-preemptable as required by FMLP under
361 * PSN-EDF.
362 */
363 make_np(new_owner);
364 return 0;
365}
366
367
368/* This function is called on a semaphore release, and assumes that
369 * the current task is also the semaphore holder.
370 */
371static long psnedf_return_priority(struct pi_semaphore *sem)
372{
373 struct task_struct* t = current;
374 psnedf_domain_t* pedf = task_pedf(t);
375 rt_domain_t* edf = task_edf(t);
376 int ret = 0;
377 int cpu = get_partition(current);
378
379
380 /* Find new highest-priority semaphore task
381 * if holder task is the current hp.cpu_task[cpu].
382 *
383 * Calling function holds sem->wait.lock.
384 */
385 if (t == sem->hp.cpu_task[cpu])
386 edf_set_hp_cpu_task(sem, cpu);
387
388 take_np(t);
389 if (current->rt_param.inh_task) {
390 TRACE_CUR("return priority of %s/%d\n",
391 current->rt_param.inh_task->comm,
392 current->rt_param.inh_task->pid);
393 spin_lock(&pedf->slock);
394
395 /* Reset inh_task to NULL. */
396 current->rt_param.inh_task = NULL;
397
398 /* check if we need to reschedule */
399 if (edf_preemption_needed(edf, current))
400 preempt(pedf);
401
402 spin_unlock(&pedf->slock);
403 } else
404 TRACE_CUR(" no priority to return %p\n", sem);
405
406 return ret;
407}
408
409#endif
410
411static long psnedf_admit_task(struct task_struct* tsk)
412{
413 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
414}
415
416/* Plugin object */
417static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
418 .plugin_name = "PSN-EDF",
419#ifdef CONFIG_SRP
420 .srp_active = 1,
421#endif
422 .tick = psnedf_tick,
423 .task_new = psnedf_task_new,
424 .complete_job = complete_job,
425 .task_exit = psnedf_task_exit,
426 .schedule = psnedf_schedule,
427 .task_wake_up = psnedf_task_wake_up,
428 .task_block = psnedf_task_block,
429#ifdef CONFIG_FMLP
430 .fmlp_active = 1,
431 .pi_block = psnedf_pi_block,
432 .inherit_priority = psnedf_inherit_priority,
433 .return_priority = psnedf_return_priority,
434#endif
435 .admit_task = psnedf_admit_task
436};
437
438
439static int __init init_psn_edf(void)
440{
441 int i;
442
443 for (i = 0; i < NR_CPUS; i++)
444 {
445 psnedf_domain_init(remote_pedf(i),
446 psnedf_check_resched,
447 NULL, i);
448 }
449 return register_sched_plugin(&psn_edf_plugin);
450}
451
452
453
454module_init(init_psn_edf);