aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/sched_mc.c
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 15:30:43 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 15:30:43 -0500
commit7806057274c493d53a214232d4df6f96aadc7547 (patch)
treeb3aa02ee438a0c5f592e125f4657a2f1e5c225f5 /litmus/sched_mc.c
parenta5d1599facc1b934e0b8d68e360dadd66c1df730 (diff)
Merge with branch wip-mc.
Diffstat (limited to 'litmus/sched_mc.c')
-rw-r--r--litmus/sched_mc.c1803
1 files changed, 1803 insertions, 0 deletions
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
new file mode 100644
index 00000000000..64de4ef9c46
--- /dev/null
+++ b/litmus/sched_mc.c
@@ -0,0 +1,1803 @@
1/*
2 * litmus/sched_mc.c
3 * Implementation of the Mixed Criticality scheduling algorithm.
4 *
5 * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010)
6 * TODO: optimize reschedule
7 */
8#include <linux/spinlock.h>
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/hrtimer.h>
12#include <linux/slab.h>
13#include <linux/module.h>
14#include <linux/poison.h>
15#include <linux/pid.h>
16
17#include <litmus/litmus.h>
18#include <litmus/trace.h>
19#include <litmus/jobs.h>
20#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h>
23#include <litmus/domain.h>
24#include <litmus/bheap.h>
25#include <litmus/event_group.h>
26#include <litmus/budget.h>
27#include <litmus/server.h>
28#include <litmus/sched_mc.h>
29#include <litmus/ce_domain.h>
30#include <litmus/dgl.h>
31#include <litmus/color.h>
32#include <litmus/way_tracker.h>
33
34struct mc_signal {
35 int update:1;
36 int preempt:1;
37};
38
39struct cpu_entry {
40 int cpu;
41 struct crit_entry crit_entries[NUM_CRIT_LEVELS];
42
43 struct task_struct* scheduled;
44 struct task_struct* will_schedule;
45 struct task_struct* linked;
46
47 struct mc_signal signal;
48
49 raw_spinlock_t lock;
50 raw_spinlock_t signal_lock;
51
52#ifdef CONFIG_PLUGIN_MC_REDIRECT
53 struct list_head redir;
54 raw_spinlock_t redir_lock;
55#endif
56#ifdef CONFIG_MERGE_TIMERS
57 struct event_group *event_group;
58#endif
59};
60
61static struct dgl group_lock;
62static raw_spinlock_t dgl_lock;
63
64DEFINE_PER_CPU(struct cpu_entry, cpus);
65static int interrupt_cpu;
66#define has_resources(t, c) (tsk_rt(t)->req == group_lock.acquired[c])
67
68#define domain_data(dom) (container_of(dom, struct domain_data, domain))
69#define is_global(dom) (domain_data(dom)->heap)
70#define is_global_task(t) (is_global(get_task_domain(t)))
71#define can_requeue(t) \
72 ((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \
73 !is_queued(t) && /* Not gonna be linked */ \
74 (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU))
75#define entry_level(e) \
76 (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1)
77#define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level])
78#define crit_cpu(ce) \
79 (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
80
81static void clear_signal(struct mc_signal *signal)
82{
83 signal->update = signal->preempt = 0;
84}
85
86/*
87 * Put in requests for resources needed by @t.
88 */
89static int acquire_resources(struct task_struct *t)
90{
91 int cpu, acquired;
92 struct server *task_server;
93 struct cpu_entry *entry;
94
95 if (!lock_cache)
96 return 1;
97
98 BUG_ON(tsk_rt(t)->linked_on == NO_CPU);
99
100
101 raw_spin_lock(&dgl_lock);
102
103 cpu = tsk_rt(t)->linked_on;
104 task_server = &tsk_rt(t)->server;
105
106 if (!cache_preempt && is_kernel_np(t)) {
107 TRACE_MC_TASK(t, "Already contending for resources\n");
108 return has_resources(t, cpu);
109 }
110
111 if (!has_resources(t, cpu)) {
112 sched_trace_task_block(t);
113 server_state_change(task_server, SS_BLOCKED, 0);
114 TRACE_MC_TASK(t, "Blocked at %llu\n", litmus_clock());
115
116 add_group_req(&group_lock, tsk_rt(t)->req, cpu);
117 if (!cache_preempt)
118 make_np(t);
119 }
120
121 acquired = has_resources(t, cpu);
122
123 if (acquired) {
124 entry = &per_cpu(cpus, cpu);
125 entry->signal.update = 0;
126 }
127 raw_spin_unlock(&dgl_lock);
128
129
130 return acquired;
131}
132
133static void release_resources(struct task_struct *t)
134{
135 struct server *task_server = &tsk_rt(t)->server;
136
137 if (!lock_cache)
138 return;
139
140 raw_spin_lock(&dgl_lock);
141
142 server_state_change(task_server, SS_REMOVED, 0);
143
144 if (cache_preempt || is_kernel_np(t)) {
145 TRACE_MC_TASK(t, "Releasing resources\n");
146
147 remove_group_req(&group_lock, tsk_rt(t)->req);
148 take_np(t);
149 } else if (!cache_preempt) {
150 TRACE_MC_TASK(t, "No resources to release!\n");
151 }
152
153 raw_spin_unlock(&dgl_lock);
154
155}
156
157static int dumb_acquire(struct task_struct *t)
158{
159 struct server *server = &tsk_rt(t)->server;
160 server_state_change(server, SS_ACTIVE, 0);
161 return 1;
162}
163
164static void dumb_release(struct task_struct *t)
165{
166 struct server *server = &tsk_rt(t)->server;
167 server_state_change(server, SS_REMOVED, 0);
168}
169
170#define fully_removed(s) ((s)->state == SS_REMOVED && !(s)->in_transit)
171
172/*
173 * Sort CPUs within a global domain's heap.
174 */
175static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
176{
177 struct domain *domain;
178 struct crit_entry *first, *second;
179 struct task_struct *first_link, *second_link;
180
181 first = a->value;
182 second = b->value;
183 first_link = first->server.linked;
184 second_link = second->server.linked;
185
186 if (fully_removed(&first->server) || fully_removed(&second->server)){
187 /* Removed entries go at the back of the heap */
188 return fully_removed(&second->server) &&
189 !fully_removed(&first->server);
190 } else if (!first_link || !second_link) {
191 /* Entry with nothing scheduled is lowest priority (front) */
192 return second_link && !first_link;
193 } else {
194 /* Sort by deadlines of tasks (later deadlines first) */
195 domain = get_task_domain(first_link);
196 return domain->higher_prio(second_link, first_link);
197 }
198}
199
200/*
201 * Return true if the domain has a higher priority ready task. The @curr
202 * task must belong to the domain.
203 */
204static int mc_preempt_needed(struct domain *dom, struct task_struct* curr)
205{
206 struct task_struct *next = dom->peek_ready(dom);
207 if (!next || !curr) {
208 return next && !curr;
209 } else {
210 BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
211 return !is_np(curr) &&
212 get_task_domain(next)->higher_prio(next, curr);
213 }
214}
215
216/*
217 * Update crit entry position in a global heap. Caller must hold
218 * @ce's domain lock.
219 */
220static void update_crit_position(struct crit_entry *ce)
221{
222 struct bheap *heap;
223 if (is_global(ce->domain)) {
224 heap = domain_data(ce->domain)->heap;
225 BUG_ON(!heap);
226 BUG_ON(!bheap_node_in_heap(ce->node));
227 bheap_delete(cpu_lower_prio, heap, ce->node);
228 bheap_insert(cpu_lower_prio, heap, ce->node);
229 }
230}
231
232/*
233 * Update crit entry position in a global heap if it has been marked
234 * for update. Caller must hold @ce's domain lock.
235 */
236static void fix_crit_position(struct crit_entry *ce)
237{
238 struct server *server = &ce->server;
239 if (is_global(ce->domain) && server->in_transit) {
240 server_state_change(server, server->state, 0);
241 update_crit_position(ce);
242 }
243}
244
245/*
246 * Return next CPU which should preempted or NULL if the domain has no
247 * preemptable CPUs. Caller must hold the @dom lock.
248 */
249static struct crit_entry* lowest_prio_cpu(struct domain *dom)
250{
251 struct bheap *heap = domain_data(dom)->heap;
252 struct bheap_node* hn;
253 struct crit_entry *ce, *res = NULL;
254
255 do {
256 hn = bheap_peek(cpu_lower_prio, heap);
257 ce = (hn) ? hn->value : NULL;
258 if (ce) {
259 if (ce->server.in_transit)
260 fix_crit_position(ce);
261 else if (ce->server.state == SS_ACTIVE)
262 res = ce;
263 else if (ce->server.state == SS_REMOVED)
264 ce = NULL;
265 }
266 } while (ce && !res);
267
268 return res;
269}
270
271/*
272 * Time accounting for ghost tasks.
273 * Must be called before a decision is made involving the task's budget.
274 */
275static void update_server_time(struct task_struct *p)
276{
277 u64 clock = litmus_clock();
278 u64 delta = clock - p->rt_param.last_exec_time;
279 if (unlikely ((s64)delta < 0)) {
280 delta = 0;
281 }
282 if (budget_remaining(p) <= delta) {
283 tsk_rt(p)->job_params.exec_time = get_exec_cost(p);
284 } else {
285 tsk_rt(p)->job_params.exec_time += delta;
286 }
287 p->rt_param.last_exec_time = clock;
288}
289
290/*
291 * Arm ghost timer. Will merge timers if the option is specified.
292 */
293static void start_crit(struct crit_entry *ce)
294{
295 lt_t fire;
296 struct task_struct *task;
297 struct server *task_server;
298
299 BUG_ON(ce->server.state != SS_ACTIVE);
300
301 task = ce->server.linked;
302 task_server = &tsk_rt(task)->server;
303
304 if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
305 /* There is a level-A timer that will force a
306 * preemption, so we don't set this for level-A
307 * tasks. Otherwise reset the budget timer
308 */
309 fire = litmus_clock() + budget_remaining(task);
310
311#ifdef CONFIG_MERGE_TIMERS
312 add_event(crit_cpu(ce)->event_group, &ce->event, fire);
313#else
314 __hrtimer_start_range_ns(&ce->timer,
315 ns_to_ktime(fire),
316 0 /* delta */,
317 HRTIMER_MODE_ABS_PINNED,
318 0 /* no wakeup */);
319#endif
320 }
321
322 server_state_change(task_server, SS_ACTIVE, 0);
323}
324
325static void stop_crit(struct crit_entry *ce)
326{
327 struct server *task_server = &tsk_rt(ce->server.linked)->server;
328
329 if (is_ghost(ce->server.linked)) {
330 if (!budget_exhausted(ce->server.linked)) {
331 /* Job isn't finished, so do accounting */
332 update_server_time(ce->server.linked);
333 }
334#ifdef CONFIG_MERGE_TIMERS
335 cancel_event(&ce->event);
336#else
337 hrtimer_try_to_cancel(&ce->timer);
338#endif
339 }
340
341 if (task_server->state != SS_BLOCKED) {
342 server_state_change(task_server, SS_REMOVED, 0);
343 }
344}
345
346/**
347 * link_task_to_crit() - Logically run a task at a criticality level.
348 * Caller must hold @ce's CPU lock.
349 */
350static void link_task_to_crit(struct crit_entry *ce,
351 struct task_struct *task)
352{
353 struct server *ce_server = &ce->server;
354
355 TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task));
356 BUG_ON(task && ce_server->state != SS_ACTIVE);
357 BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
358 BUG_ON(task && is_global(ce->domain) &&
359 !bheap_node_in_heap(ce->node));
360
361 /* Unlink last task */
362 if (ce->server.linked) {
363 ce->domain->release_resources(ce->server.linked);
364 if (ce_server->state == SS_BLOCKED) {
365 server_state_change(ce_server, SS_ACTIVE, 0);
366 }
367
368 TRACE_MC_TASK(ce->server.linked, "Unlinking\n");
369
370 stop_crit(ce);
371 tsk_rt(ce->server.linked)->server.parent = 0;
372 tsk_rt(ce->server.linked)->server.cpu = NO_CPU;
373 ce->server.linked->rt_param.linked_on = NO_CPU;
374 }
375
376 /* Actually link task */
377 ce->server.linked = task;
378 if (task) {
379 /* Block if task cannot acquire resources */
380 task->rt_param.linked_on = crit_cpu(ce)->cpu;
381 tsk_rt(task)->server.parent = ce_sid(ce);
382 tsk_rt(ce->server.linked)->server.cpu = crit_cpu(ce)->cpu;
383
384 if (ce->domain->acquire_resources(task)) {
385 start_crit(ce);
386 } else {
387 server_state_change(ce_server, SS_BLOCKED, 0);
388 }
389 }
390}
391
392static void check_for_preempt(struct domain*);
393
394/**
395 * job_arrival() - Called when a task re-enters the system.
396 * Caller must hold no locks.
397 */
398static void job_arrival(struct task_struct *task)
399{
400 struct domain *dom = get_task_domain(task);
401
402 TRACE_MC_TASK(task, "Job arriving\n");
403 BUG_ON(!task);
404
405 raw_spin_lock(dom->lock);
406 if (can_requeue(task)) {
407 BUG_ON(task->rt_param.linked_on != NO_CPU);
408 dom->requeue(dom, task);
409 check_for_preempt(dom);
410 } else {
411 /* If a global task is scheduled on one cpu, it CANNOT
412 * be requeued into a global domain. Another cpu might
413 * dequeue the global task before it is descheduled,
414 * causing the system to crash when the task is scheduled
415 * in two places simultaneously.
416 */
417 TRACE_MC_TASK(task, "Delayed arrival of scheduled task, "
418 "linked: %d, sched: %d, queued: %d\n",
419 tsk_rt(task)->linked_on, tsk_rt(task)->scheduled_on,
420 is_queued(task));
421 }
422 raw_spin_unlock(dom->lock);
423}
424
425/**
426 * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will
427 * redirect a lower priority job_arrival work to the interrupt_cpu.
428 */
429static void low_prio_arrival(struct task_struct *task)
430{
431 struct cpu_entry *entry;
432
433 /* Race conditions! */
434 if (!can_requeue(task)) return;
435
436#ifdef CONFIG_PLUGIN_MC_REDIRECT
437 if (!is_global_task(task))
438 goto arrive;
439 if (smp_processor_id() != interrupt_cpu) {
440 entry = &__get_cpu_var(cpus);
441 raw_spin_lock(&entry->redir_lock);
442 TRACE_MC_TASK(task, "Adding to redirect queue\n");
443 list_add(&tsk_rt(task)->list, &entry->redir);
444 raw_spin_unlock(&entry->redir_lock);
445 litmus_reschedule(interrupt_cpu);
446 } else
447#endif
448 {
449arrive:
450 TRACE_MC_TASK(task, "On interrupt master, requeueing task\n");
451 job_arrival(task);
452 }
453}
454
455#ifdef CONFIG_PLUGIN_MC_REDIRECT
456/**
457 * fix_global_levels() - Execute redirected job arrivals on this cpu.
458 */
459static void fix_global_levels(void)
460{
461 int c;
462 struct cpu_entry *e;
463 struct list_head *pos, *safe;
464 struct task_struct *t;
465
466 STRACE("Fixing global levels\n");
467 for_each_online_cpu(c) {
468 e = &per_cpu(cpus, c);
469 raw_spin_lock(&e->redir_lock);
470 list_for_each_safe(pos, safe, &e->redir) {
471 t = list_entry(pos, struct task_struct, rt_param.list);
472 BUG_ON(!t);
473 TRACE_MC_TASK(t, "Dequeued redirected job\n");
474 list_del_init(pos);
475 job_arrival(t);
476 }
477 raw_spin_unlock(&e->redir_lock);
478 }
479}
480#endif
481
482/**
483 * link_task_to_cpu() - Logically run a task on a CPU.
484 * The task must first have been linked to one of the CPU's crit_entries.
485 * Caller must hold the entry lock.
486 */
487static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task)
488{
489 int i = entry_level(entry);
490 struct crit_entry *ce;
491 struct server *server;
492
493 TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu);
494 BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
495 BUG_ON(task && is_ghost(task));
496
497 if (entry->linked) {
498 server = &tsk_rt(entry->linked)->server;
499 sched_trace_server_switch_away(server->sid, *server->job,
500 entry->linked->pid,
501 get_user_job(entry->linked),
502 entry->cpu);
503 }
504
505 if (task) {
506 server = &tsk_rt(task)->server;
507 sched_trace_server_switch_to(server->sid, *server->job,
508 task->pid,
509 get_user_job(task),
510 entry->cpu);
511 }
512
513 entry->linked = task;
514
515 /* Higher criticality crit entries are now usable */
516 for (; i < entry_level(entry) + 1; i++) {
517 ce = &entry->crit_entries[i];
518 server = &ce->server;
519
520 if (server->state == SS_REMOVED) {
521 TRACE_CRIT_ENTRY(ce, "Moving up to active\n");
522 server_state_change(server, SS_ACTIVE, 1);
523 }
524 }
525}
526
527static void preempt_cpu(struct cpu_entry *entry, struct task_struct *t)
528{
529 link_task_to_cpu(entry, t);
530 litmus_reschedule(entry->cpu);
531}
532
533/**
534 * preempt_crit() - Preempt a logically running task with a higher priority one.
535 * @dom Domain from which to draw higher priority task
536 * @ce CPU criticality level to preempt
537 * @return Preempted task
538 *
539 * Caller must hold the lock for @dom and @ce's CPU lock.
540 */
541static struct task_struct* preempt_crit(struct domain *dom, struct crit_entry *ce)
542{
543 struct task_struct *task = dom->take_ready(dom);
544 struct cpu_entry *entry = crit_cpu(ce);
545 struct task_struct *old = ce->server.linked;
546
547 BUG_ON(!task);
548 TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task));
549
550 /* Per-domain preemption */
551 link_task_to_crit(ce, task);
552 /* if (old && can_requeue(old)) { */
553 /* dom->requeue(dom, old); */
554 /* } */
555 update_crit_position(ce);
556
557 /* Preempt actual execution if this is a running task.
558 * We know that our task is higher priority than what is currently
559 * running on this CPU as otherwise the crit_entry would have
560 * been disabled and a preemption could not have occurred
561 */
562 if (!is_ghost(task) && SS_BLOCKED != ce->server.state) {
563 preempt_cpu(entry, task);
564 } else if (old && old == entry->linked) {
565 /* Preempted running task with ghost job. Nothing should run */
566 preempt_cpu(entry, NULL);
567 }
568
569 return old;
570}
571
572/**
573 * update_crit_levels() - Update criticality entries for the new cpu state.
574 * This should be called after a new task has been linked to @entry.
575 * The caller must hold the @entry->lock, but this method will release it.
576 */
577static void update_crit_levels(struct cpu_entry *entry)
578{
579 int i, global_preempted;
580 struct server *server;
581 struct crit_entry *ce;
582 struct task_struct *readmit[NUM_CRIT_LEVELS];
583 enum crit_level level = entry_level(entry);
584
585 /* Remove lower priority tasks from the entry */
586 for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
587 ce = &entry->crit_entries[i];
588 server = &ce->server;
589
590 global_preempted = ce->server.linked &&
591 /* This task is running on a cpu */
592 ce->server.linked->rt_param.scheduled_on == entry->cpu &&
593 /* But it was preempted */
594 ce->server.linked != entry->linked &&
595 /* And it is an eligible global task */
596 !is_ghost(ce->server.linked) && is_global(ce->domain);
597
598 /* Do not readmit global tasks which are preempted! These can't
599 * ever be re-admitted until they are descheduled for reasons
600 * explained in job_arrival.
601 */
602 readmit[i] = (!global_preempted) ? ce->server.linked : NULL;
603
604 if (server->state != SS_REMOVED) {
605 if (ce->server.linked) {
606 link_task_to_crit(ce, NULL);
607 }
608 TRACE_CRIT_ENTRY(ce, "Removing lower crit\n");
609 server_state_change(server, SS_REMOVED, 1);
610
611 }
612 }
613 /* Need to unlock so we can access domains */
614 raw_spin_unlock(&entry->lock);
615
616 /* Re-admit tasks to the system */
617 for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
618 ce = &entry->crit_entries[i];
619 if (readmit[i]) {
620 low_prio_arrival(readmit[i]);
621 }
622 }
623}
624
625/*
626 * Assumes a single, lowest-priority global criticicality level. This avoids
627 * unnecessary calls to update_crit_levels.
628 */
629static void check_global_preempt(struct domain *dom)
630{
631 int recheck;
632 struct crit_entry *ce;
633 struct cpu_entry *entry;
634 struct task_struct *preempted;
635
636 recheck = 1;
637
638 /* Loop until we find a non-preemptable CPU */
639 while (recheck && (ce = lowest_prio_cpu(dom))) {
640 entry = crit_cpu(ce);
641 recheck = 1;
642 preempted = NULL;
643
644 /* Cache next task */
645 dom->peek_ready(dom);
646
647 raw_spin_lock(&entry->lock);
648
649 if (ce->server.in_transit) {
650 /* CPU disabled while locking! */
651 fix_crit_position(ce);
652 } else if (mc_preempt_needed(dom, ce->server.linked)) {
653 /* Success! Check for more preemptions */
654 preempted = preempt_crit(dom, ce);
655 } else {
656 /* Failure! */
657 recheck = 0;
658 }
659
660 raw_spin_unlock(&entry->lock);
661
662 /* Only add preempted task after lock has been released */
663 if (preempted && can_requeue(preempted)) {
664 dom->requeue(dom, preempted);
665 }
666 }
667}
668
669static void check_partitioned_preempt(struct domain *dom)
670{
671 struct cpu_entry *entry;
672 struct crit_entry *ce;
673
674 ce = domain_data(dom)->crit_entry;
675 entry = crit_cpu(ce);
676
677 if (ce->server.state == SS_REMOVED ||
678 !mc_preempt_needed(dom, ce->server.linked)) {
679 return;
680 }
681
682 entry->signal.preempt = 1;
683 litmus_reschedule(entry->cpu);
684}
685
686/**
687 * check_for_preempt() - Causes a preemption if higher-priority tasks are ready.
688 * Caller must hold domain lock.
689 */
690static void check_for_preempt(struct domain *dom)
691{
692 struct crit_entry *ce;
693 struct cpu_entry *entry;
694
695 if (is_global(dom)) {
696 check_global_preempt(dom);
697 } else {
698 ce = domain_data(dom)->crit_entry;
699 entry = crit_cpu(ce);
700
701 /* Cache next task */
702 dom->peek_ready(dom);
703
704 raw_spin_lock(&entry->lock);
705 check_partitioned_preempt(dom);
706 raw_spin_unlock(&entry->lock);
707 }
708}
709
710/**
711 * remove_from_all() - Logically remove a task from all structures.
712 * Caller must hold no locks.
713 */
714static void remove_from_all(struct task_struct* task)
715{
716 int update = 0;
717 struct cpu_entry *entry;
718 struct crit_entry *ce;
719 struct domain *dom = get_task_domain(task);
720
721 TRACE_MC_TASK(task, "Removing from everything\n");
722 BUG_ON(!task);
723
724 raw_spin_lock(dom->lock);
725
726 /* Remove the task from any CPU state */
727 if (task->rt_param.linked_on != NO_CPU) {
728 TRACE_MC_TASK(task, "Linked to something\n");
729 entry = &per_cpu(cpus, task->rt_param.linked_on);
730 raw_spin_lock(&entry->lock);
731
732 /* Unlink only if task is still linked post lock */
733 ce = &entry->crit_entries[tsk_mc_crit(task)];
734 if (task->rt_param.linked_on != NO_CPU) {
735 BUG_ON(ce->server.linked != task);
736 if (entry->linked == task) {
737 update = 1;
738 link_task_to_cpu(entry, NULL);
739 }
740 link_task_to_crit(ce, NULL);
741 update_crit_position(ce);
742 } else {
743 TRACE_MC_TASK(task, "Unlinked before we got lock!\n");
744 }
745 raw_spin_unlock(&entry->lock);
746 } else {
747 TRACE_MC_TASK(task, "Not linked to anything\n");
748 }
749
750 /* Ensure the task isn't returned by its domain */
751 dom->remove(dom, task);
752
753 raw_spin_unlock(dom->lock);
754}
755
756/**
757 * job_completion() - Update task state and re-enter it into the system.
758 * Converts tasks which have completed their execution early into ghost jobs.
759 * Caller must hold no locks.
760 */
761static void job_completion(struct task_struct *task, int forced)
762{
763 int release_server;
764 struct cpu_entry *entry;
765 struct crit_entry *ce;
766
767 TRACE_MC_TASK(task, "Completed\n");
768
769 if (!forced) {
770 /* Userspace signaled job completion */
771 sched_trace_task_completion(current, 0);
772 mb();
773 setup_user_release(current, get_user_deadline(current));
774 }
775
776#ifndef CONFIG_PLUGIN_MC_LINUX_SLACK_STEALING
777 /* Release lowest-criticality task's servers with their userspace tasks,
778 * preventing them from turning into idle ghost tasks
779 */
780 if (tsk_mc_crit(task) == NUM_CRIT_LEVELS - 1)
781 release_server = 1;
782 else
783#endif
784 release_server = budget_exhausted(task);
785
786 if (release_server || forced) {
787 if (release_server)
788 sched_trace_server_completion(-task->pid,
789 get_rt_job(task));
790 /* Only unlink (and release resources) if the current server job
791 * must stop logically running
792 */
793 remove_from_all(task);
794 }
795
796 if (lt_before(get_user_release(task), litmus_clock()) ||
797 (release_server && tsk_rt(task)->completed)){
798 TRACE_TASK(task, "Executable task going back to running\n");
799 tsk_rt(task)->completed = 0;
800 }
801
802 if (release_server || forced) {
803 /* TODO: Level A does this independently and should not */
804 if (release_server && CRIT_LEVEL_A != tsk_mc_crit(task)) {
805 prepare_for_next_period(task);
806 }
807
808 TRACE_TASK(task, "Is released: %d, now: %llu, rel: %llu\n",
809 is_released(task, litmus_clock()), litmus_clock(),
810 get_release(task));
811
812 /* Requeue non-blocking tasks */
813 if (is_running(task)) {
814 job_arrival(task);
815 }
816 } else if (is_ghost(task)) {
817 entry = &per_cpu(cpus, tsk_rt(task)->linked_on);
818 ce = &entry->crit_entries[tsk_mc_crit(task)];
819
820 raw_spin_lock(&entry->lock);
821 if (ce->server.linked == task) {
822 /* The task went ghost while it was linked to a CPU */
823 link_task_to_cpu(entry, NULL);
824 stop_crit(ce);
825 if (ce->server.state == SS_ACTIVE)
826 start_crit(ce);
827 }
828 raw_spin_unlock(&entry->lock);
829 }
830}
831
832/**
833 * mc_ghost_exhausted() - Complete logically running ghost task.
834 */
835#ifdef CONFIG_MERGE_TIMERS
836static void mc_ghost_exhausted(struct rt_event *e)
837{
838 struct crit_entry *ce = container_of(e, struct crit_entry, event);
839#else
840static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
841{
842 struct crit_entry *ce = container_of(timer, struct crit_entry, timer);
843
844#endif
845 struct task_struct *tmp = NULL;
846 struct cpu_entry *entry = crit_cpu(ce);
847 TRACE("Firing here at %llu\n", litmus_clock());
848 TRACE_CRIT_ENTRY(ce, "For this\n");
849
850 raw_spin_lock(&entry->lock);
851
852 if (is_ghost(ce->server.linked)) {
853 update_server_time(ce->server.linked);
854 if (budget_exhausted(ce->server.linked)) {
855 tmp = ce->server.linked;
856 }
857 } else {
858 litmus_reschedule(crit_cpu(ce)->cpu);
859 }
860
861 raw_spin_unlock(&entry->lock);
862
863 if (tmp)
864 job_completion(tmp, 1);
865
866#ifndef CONFIG_MERGE_TIMERS
867 return HRTIMER_NORESTART;
868#endif
869}
870
871/*
872 * The MC-CE common timer callback code for merged and non-merged timers.
873 * Returns the next time the timer should fire.
874 */
875static lt_t __ce_timer_function(struct ce_dom_data *ce_data)
876{
877 struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A);
878 struct domain *dom = ce->domain;
879 struct task_struct *old_link = NULL;
880 lt_t next_timer_abs;
881
882 TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
883
884 raw_spin_lock(dom->lock);
885
886 raw_spin_lock(&crit_cpu(ce)->lock);
887 if (ce->server.linked &&
888 ce->server.linked == ce_data->should_schedule)
889 {
890 old_link = ce->server.linked;
891 link_task_to_crit(ce, NULL);
892 mc_ce_job_completion(dom, old_link);
893 }
894 raw_spin_unlock(&crit_cpu(ce)->lock);
895
896 next_timer_abs = mc_ce_timer_callback_common(dom);
897
898 /* Job completion will check for preemptions by means of calling job
899 * arrival if the task is not blocked */
900 if (NULL != old_link) {
901 STRACE("old_link " TS " so will call job completion\n", TA(old_link));
902 raw_spin_unlock(dom->lock);
903 job_completion(old_link, 1);
904 } else {
905 STRACE("old_link was null, so will call check for preempt\n");
906 check_for_preempt(dom);
907 raw_spin_unlock(dom->lock);
908 }
909 return next_timer_abs;
910}
911
912#ifdef CONFIG_MERGE_TIMERS
913static void ce_timer_function(struct rt_event *e)
914{
915 struct ce_dom_data *ce_data =
916 container_of(e, struct ce_dom_data, event);
917 unsigned long flags;
918 lt_t next_timer_abs;
919
920 TS_LVLA_RELEASE_START;
921
922 local_irq_save(flags);
923 next_timer_abs = __ce_timer_function(ce_data);
924 add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs);
925 local_irq_restore(flags);
926
927 TS_LVLA_RELEASE_END;
928}
929#else /* else to CONFIG_MERGE_TIMERS */
930static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
931{
932 struct ce_dom_data *ce_data =
933 container_of(timer, struct ce_dom_data, timer);
934 unsigned long flags;
935 lt_t next_timer_abs;
936
937 TS_LVLA_RELEASE_START;
938
939 local_irq_save(flags);
940 next_timer_abs = __ce_timer_function(ce_data);
941 hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
942 local_irq_restore(flags);
943
944 TS_LVLA_RELEASE_END;
945
946 return HRTIMER_RESTART;
947}
948#endif /* CONFIG_MERGE_TIMERS */
949
950/**
951 * mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
952 */
953static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
954{
955 unsigned long flags;
956 struct task_struct *first = bheap_peek(rt->order, tasks)->value;
957 struct domain *dom = get_task_domain(first);
958
959 raw_spin_lock_irqsave(dom->lock, flags);
960 TRACE(TS "Jobs released\n", TA(first));
961 __merge_ready(rt, tasks);
962 check_for_preempt(dom);
963 raw_spin_unlock_irqrestore(dom->lock, flags);
964}
965
966/**
967 * ms_task_new() - Setup new mixed-criticality task.
968 * Assumes that there are no partitioned domains after level B.
969 */
970static void mc_task_new(struct task_struct *t, int on_rq, int running)
971{
972 unsigned long flags;
973 int i;
974 struct cpu_entry* entry;
975 enum crit_level level = tsk_mc_crit(t);
976 struct dgl_group_req *req;
977 struct control_page *cp = tsk_rt(t)->ctrl_page;
978 struct color_ctrl_page *ccp = &tsk_rt(t)->color_ctrl_page;
979
980 local_irq_save(flags);
981 TRACE("New mixed criticality task %d\n", t->pid);
982
983 if (level == CRIT_LEVEL_A)
984 get_rt_relative_deadline(t) = get_exec_cost(t);
985
986 /* Assign domain */
987 if (level < CRIT_LEVEL_C)
988 entry = &per_cpu(cpus, get_partition(t));
989 else
990 entry = &per_cpu(cpus, task_cpu(t));
991 t->rt_param._domain = entry->crit_entries[level].domain;
992
993 tsk_rt(t)->flush = 0;
994 tsk_rt(t)->load = 0;
995
996 /* Userspace and kernelspace view of task state may differ.
997 * Model kernel state as a budget enforced container
998 */
999 sched_trace_container_param(t->pid, t->comm);
1000 sched_trace_server_param(-t->pid, t->pid,
1001 get_exec_cost(t), get_rt_period(t));
1002 server_init(&tsk_rt(t)->server, -t->pid,
1003 &tsk_rt(t)->job_params.job_no,
1004 NO_CPU);
1005 tsk_rt(t)->task_params.budget_policy = PRECISE_ENFORCEMENT;
1006
1007 BUG_ON(!tsk_rt(t)->server.job);
1008
1009 /* Apply chunking */
1010 if (level == CRIT_LEVEL_B && color_chunk &&
1011 lt_after(get_exec_cost(t), color_chunk)) {
1012 tsk_rt(t)->orig_cost = get_exec_cost(t);
1013 }
1014
1015 /* Setup color request */
1016 req = kmalloc(sizeof(*req), GFP_ATOMIC);
1017 req->task = t;
1018 tsk_rt(t)->req = req;
1019 if (cp && ccp) {
1020 TRACE_MC_TASK(t, "Initializing group request\n");
1021 cp->colors_updated = 0;
1022 dgl_group_req_init(&group_lock, req);
1023 for (i = 0; ccp->pages[i]; ++i)
1024 set_req(&group_lock, req, ccp->colors[i], ccp->pages[i]);
1025 } else {
1026 BUG_ON(CRIT_LEVEL_B == tsk_mc_crit(t));
1027 }
1028
1029 /* Setup job params */
1030 release_at(t, litmus_clock());
1031 if (running) {
1032 BUG_ON(entry->scheduled);
1033 TRACE_MC_TASK(t, "Was already running\n");
1034 entry->scheduled = t;
1035 tsk_rt(t)->scheduled_on = entry->cpu;
1036 tsk_rt(t)->last_exec_time = litmus_clock();
1037 } else {
1038 t->rt_param.scheduled_on = NO_CPU;
1039 }
1040 t->rt_param.linked_on = NO_CPU;
1041
1042 job_arrival(t);
1043
1044 local_irq_restore(flags);
1045}
1046
1047/**
1048 * mc_task_new() - Add task back into its domain check for preemptions.
1049 */
1050static void mc_task_wake_up(struct task_struct *task)
1051{
1052 unsigned long flags;
1053 lt_t now = litmus_clock();
1054 local_irq_save(flags);
1055
1056 TRACE(TS " wakes up\n", TA(task));
1057 if (is_tardy(task, now)) {
1058 /* Task missed its last release */
1059 release_at(task, now);
1060 sched_trace_task_release(task);
1061 }
1062
1063 if (budget_exhausted(task))
1064 /* Rare, but possible, race condition */
1065 job_completion(task, 1);
1066 else
1067 job_arrival(task);
1068
1069 local_irq_restore(flags);
1070}
1071
1072/**
1073 * mc_task_block() - Remove task from state to prevent it being run anywhere.
1074 */
1075static void mc_task_block(struct task_struct *task)
1076{
1077 unsigned long flags;
1078 local_irq_save(flags);
1079 TRACE(TS " blocks\n", TA(task));
1080 remove_from_all(task);
1081 local_irq_restore(flags);
1082}
1083
1084/**
1085 * mc_task_exit() - Remove task from the system.
1086 */
1087static void mc_task_exit(struct task_struct *task)
1088{
1089 unsigned long flags;
1090 local_irq_save(flags);
1091 BUG_ON(!is_realtime(task));
1092 TRACE(TS " RIP\n", TA(task));
1093
1094 if (tsk_mc_crit(task) == CRIT_LEVEL_B && lock_cache) {
1095 color_sched_out_task(task);
1096 }
1097
1098 remove_from_all(task);
1099 if (tsk_rt(task)->scheduled_on != NO_CPU) {
1100 per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL;
1101 tsk_rt(task)->scheduled_on = NO_CPU;
1102 }
1103
1104 /* TODO: restore. This was geting triggered by race conditions even when
1105 * no level-A task was executing */
1106 /* if (CRIT_LEVEL_A == tsk_mc_crit(task)) */
1107 /* mc_ce_task_exit_common(task); */
1108
1109 local_irq_restore(flags);
1110}
1111
1112/**
1113 * mc_admit_task() - Return true if the task is valid.
1114 * Assumes there are no partitioned levels after level B.
1115 */
1116static long mc_admit_task(struct task_struct* task)
1117{
1118 const enum crit_level crit = tsk_mc_crit(task);
1119 long ret;
1120 if (!tsk_mc_data(task)) {
1121 printk(KERN_WARNING "Tried to admit task with no criticality "
1122 "level\n");
1123 ret = -EINVAL;
1124 goto out;
1125 }
1126 if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
1127 printk(KERN_WARNING "Tried to admit partitioned task with no "
1128 "partition\n");
1129 ret = -EINVAL;
1130 goto out;
1131 }
1132 /* if (crit < CRIT_LEVEL_C && get_partition(task) == interrupt_cpu) { */
1133 /* printk(KERN_WARNING "Tried to admit partitioned task on " */
1134 /* "the interrupt master\n"); */
1135 /* ret = -EINVAL; */
1136 /* goto out; */
1137 /* } */
1138 if (crit == CRIT_LEVEL_A) {
1139 ret = mc_ce_admit_task_common(task);
1140 if (ret)
1141 goto out;
1142 }
1143 printk(KERN_INFO "Admitted task with criticality level %d\n",
1144 tsk_mc_crit(task));
1145 ret = 0;
1146out:
1147 return ret;
1148}
1149
1150/*
1151 * Caller must hold the entry lock.
1152 */
1153void pick_next_task(struct cpu_entry *entry)
1154{
1155 int i;
1156 struct crit_entry *ce;
1157 struct domain *dom;
1158 struct task_struct *dtask, *ready_task;
1159 struct server *server;
1160
1161 STRACE("Picking next task\n");
1162
1163 for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) {
1164 ce = &entry->crit_entries[i];
1165 dom = ce->domain;
1166 server = &ce->server;
1167
1168 /* Swap locks. We cannot acquire a domain lock while
1169 * holding an entry lock or deadlocks will happen
1170 */
1171 raw_spin_unlock(&entry->lock);
1172 raw_spin_lock(dom->lock);
1173
1174 /* Do domain stuff before grabbing CPU locks */
1175 dtask = dom->peek_ready(dom);
1176 fix_crit_position(ce);
1177
1178 raw_spin_lock(&entry->lock);
1179
1180 ready_task = NULL;
1181 if (!entry->linked && server->state == SS_ACTIVE) {
1182 if (ce->server.linked) {
1183 ready_task = ce->server.linked;
1184 } else if (dtask) {
1185 /* Need a new task */
1186 dom->take_ready(dom);
1187 ready_task = dtask;
1188
1189 link_task_to_crit(ce, dtask);
1190 update_crit_position(ce);
1191 }
1192 }
1193 if (ready_task && !is_ghost(ready_task) &&
1194 server->state == SS_ACTIVE) {
1195 link_task_to_cpu(entry, ready_task);
1196 raw_spin_unlock(dom->lock);
1197 update_crit_levels(entry);
1198 raw_spin_lock(&entry->lock);
1199 continue;
1200 }
1201 raw_spin_unlock(dom->lock);
1202 }
1203}
1204
1205static void process_update_signal(struct cpu_entry *entry)
1206{
1207 int locked;
1208 struct crit_entry *ce;
1209 struct server *crit_server, *task_server;
1210 struct task_struct *linked;
1211
1212 STRACE("Reading update signal\n");
1213
1214 ce = &entry->crit_entries[CRIT_LEVEL_B];
1215
1216 /* Complete task state transitions */
1217 crit_server = &ce->server;
1218 if (!crit_server->linked) {
1219 return;
1220 }
1221
1222 linked = crit_server->linked;
1223 task_server = &tsk_rt(linked)->server;
1224 if (!task_server->in_transit) {
1225 return;
1226 }
1227
1228 raw_spin_lock(&dgl_lock);
1229
1230 /* Update and save lock state */
1231 update_group_req(&group_lock, tsk_rt(linked)->req);
1232 locked = has_resources(linked, entry->cpu);
1233
1234 raw_spin_unlock(&dgl_lock);
1235
1236 if (locked && crit_server->state != SS_ACTIVE) {
1237 TRACE_MC_TASK(linked, "Activated\n");
1238
1239 server_state_change(crit_server, SS_ACTIVE, 0);
1240 start_crit(ce);
1241 server_state_change(task_server, SS_ACTIVE, 0);
1242
1243 if (!is_ghost(linked)) {
1244 link_task_to_cpu(entry, linked);
1245 update_crit_levels(entry);
1246 raw_spin_lock(&entry->lock);
1247 }
1248 } else if (!locked && crit_server->state != SS_BLOCKED) {
1249 TRACE_MC_TASK(linked, "Blocked\n");
1250
1251 if (entry->linked == linked) {
1252 link_task_to_cpu(entry, NULL);
1253 }
1254
1255 server_state_change(task_server, SS_BLOCKED, 0);
1256 stop_crit(ce);
1257 server_state_change(crit_server, SS_BLOCKED, 0);
1258 }
1259
1260
1261}
1262
1263static void process_signals(struct cpu_entry *entry)
1264{
1265 struct domain *dom;
1266 struct crit_entry *ce;
1267 struct mc_signal signal;
1268 struct task_struct *preempted;
1269
1270 ce = &entry->crit_entries[CRIT_LEVEL_B];
1271 dom = ce->domain;
1272
1273 /* Load signals */
1274 raw_spin_lock(&entry->signal_lock);
1275 signal = entry->signal;
1276 clear_signal(&entry->signal);
1277 raw_spin_unlock(&entry->signal_lock);
1278
1279 if (signal.preempt) {
1280 raw_spin_lock(dom->lock);
1281 /* A higher-priority task may exist */
1282 STRACE("Reading preempt signal\n");
1283 dom->peek_ready(dom);
1284
1285 raw_spin_lock(&entry->lock);
1286
1287 if (ce->server.state == SS_ACTIVE &&
1288 mc_preempt_needed(ce->domain, ce->server.linked)) {
1289 preempted = preempt_crit(ce->domain, ce);
1290 raw_spin_unlock(dom->lock);
1291
1292 /* Can't requeue while we hold the entry lock, but
1293 * can't release that lock until state of lower-crit
1294 * servers is updated
1295 */
1296 if (!is_ghost(ce->server.linked)) {
1297 update_crit_levels(entry);
1298 } else {
1299 raw_spin_unlock(&entry->lock);
1300 }
1301
1302 if (preempted) {
1303 raw_spin_lock(dom->lock);
1304 dom->requeue(dom, preempted);
1305 raw_spin_unlock(dom->lock);
1306 }
1307
1308 raw_spin_lock(&entry->lock);
1309 } else {
1310 raw_spin_unlock(dom->lock);
1311 }
1312 } else {
1313 raw_spin_lock(&entry->lock);
1314 }
1315
1316 if (signal.update) {
1317 process_update_signal(entry);
1318 }
1319}
1320
1321/**
1322 * mc_schedule() - Return next task which should be scheduled.
1323 */
1324static struct task_struct* mc_schedule(struct task_struct* prev)
1325{
1326 lt_t start, exec;
1327 int out_of_time, sleep, preempt, exists, blocks, global, lower, work;
1328 struct cpu_entry* entry = &__get_cpu_var(cpus);
1329 struct task_struct *next = NULL;
1330
1331 /* Litmus gave up because it couldn't access the stack of the CPU
1332 * on which will_schedule was migrating from. Requeue it.
1333 * This really only happens in VMs
1334 */
1335 if (entry->will_schedule && entry->will_schedule != prev) {
1336 entry->will_schedule->rt_param.scheduled_on = NO_CPU;
1337 low_prio_arrival(entry->will_schedule);
1338 }
1339
1340 if (prev && tsk_rt(prev)->last_exec_time) {
1341 exec = litmus_clock() - tsk_rt(prev)->last_exec_time;
1342 tsk_rt(prev)->user_job.exec_time += exec;
1343 }
1344
1345 if (prev && tsk_mc_crit(prev) == CRIT_LEVEL_B &&
1346 is_realtime(prev) && get_rt_job(prev) > 1 && lock_cache) {
1347 start = litmus_clock();
1348 work = color_sched_out_task(prev);
1349 tsk_rt(prev)->flush = litmus_clock() - start;
1350 ++tsk_rt(prev)->flush_work;
1351 }
1352
1353 TS_LVLA_SCHED_START;
1354 TS_LVLB_SCHED_START;
1355 TS_LVLC_SCHED_START;
1356
1357 raw_spin_lock(&entry->lock);
1358
1359 BUG_ON(entry->scheduled && entry->scheduled != prev);
1360 BUG_ON(entry->scheduled && !is_realtime(prev));
1361 BUG_ON(prev && is_realtime(prev) && !entry->scheduled);
1362
1363 if (entry->scheduled != NULL) {
1364 entry->scheduled->rt_param.scheduled_on = NO_CPU;
1365 update_server_time(entry->scheduled);
1366 }
1367
1368 /* Determine state */
1369 exists = entry->scheduled != NULL;
1370 blocks = exists && !is_running(entry->scheduled);
1371 out_of_time = exists && budget_exhausted(entry->scheduled);
1372 sleep = exists && tsk_rt(entry->scheduled)->completed;
1373 global = exists && is_global_task(entry->scheduled);
1374 preempt = entry->scheduled != entry->linked;
1375 lower = exists && preempt && entry->linked &&
1376 tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked);
1377
1378 TRACE(TS " block:%d oot:%d sleep:%d preempt:%d, now: %llu\n",
1379 TA(prev), blocks, out_of_time, sleep, preempt, litmus_clock());
1380
1381 raw_spin_unlock(&entry->lock);
1382
1383#ifdef CONFIG_PLUGIN_MC_REDIRECT
1384 if (smp_processor_id() == interrupt_cpu)
1385 fix_global_levels();
1386#endif
1387
1388 /* If a task blocks we have no choice but to reschedule */
1389 if (blocks)
1390 remove_from_all(entry->scheduled);
1391 /* Any task which exhausts its budget or sleeps waiting for its next
1392 * period completes unless its execution has been forcibly stopped
1393 */
1394 else if (out_of_time || sleep)/* && !preempt)*/
1395 job_completion(entry->scheduled, !sleep || preempt);
1396 /* Global scheduled tasks must wait for a deschedule before they
1397 * can rejoin the global state. Rejoin them here
1398 */
1399 else if (global && preempt) {
1400 if (lower)
1401 low_prio_arrival(entry->scheduled);
1402 else
1403 job_arrival(entry->scheduled);
1404 }
1405
1406 /* TODO: move this down somehow */
1407 sched_state_task_picked();
1408
1409 process_signals(entry);
1410
1411 /* Pick next task if none is linked */
1412 if (!entry->linked)
1413 pick_next_task(entry);
1414
1415 /* Schedule next task */
1416 next = entry->linked;
1417 if (next) {
1418 next->rt_param.scheduled_on = entry->cpu;
1419 }
1420 entry->will_schedule = next;
1421
1422 raw_spin_unlock(&entry->lock);
1423
1424 if (next) {
1425 switch (tsk_mc_crit(next)) {
1426 case CRIT_LEVEL_A: TS_LVLA_SCHED_END(next); break;
1427 case CRIT_LEVEL_B: TS_LVLB_SCHED_END(next); break;
1428 case CRIT_LEVEL_C: TS_LVLC_SCHED_END(next); break;
1429 }
1430 }
1431
1432 if (next && tsk_mc_crit(next) == CRIT_LEVEL_B && lock_cache && get_rt_job(next) > 1) {
1433 start = litmus_clock();
1434 work = color_sched_in_task(next);
1435 tsk_rt(next)->load = litmus_clock() - start;
1436 tsk_rt(next)->load_work = work;
1437 }
1438
1439 if (next) {
1440 tsk_rt(next)->last_exec_time = litmus_clock();
1441 TRACE_MC_TASK(next, "Picked this task\n");
1442 } else {
1443 STRACE("CPU %d idles at %llu\n", entry->cpu, litmus_clock());
1444 }
1445
1446 return next;
1447}
1448
1449void mc_finish_switch(struct task_struct *prev)
1450{
1451 struct cpu_entry* entry = &__get_cpu_var(cpus);
1452 entry->scheduled = is_realtime(current) ? current : NULL;
1453 TRACE_TASK(prev, "Switched away from to " TS "\n",
1454 TA(entry->scheduled));
1455}
1456
1457long mc_deactivate_plugin(void)
1458{
1459 return mc_ce_deactivate_plugin_common();
1460}
1461
1462static unsigned long long deadline_prio(struct dgl *dgl, struct dgl_group_req *greq)
1463{
1464 return get_deadline(greq->task);
1465}
1466
1467static void cpu_update(struct dgl_group_req *greq)
1468{
1469 struct cpu_entry *entry = &per_cpu(cpus, greq->cpu);
1470
1471 raw_spin_lock(&entry->signal_lock);
1472 entry->signal.update = 1;
1473 raw_spin_unlock(&entry->signal_lock);
1474
1475 litmus_reschedule(greq->cpu);
1476}
1477
1478/*
1479 * Setup and send signal to CPU for resource acquisition. To avoid touching
1480 * CPU locks, all CPU state modifications are delayed until the signal is
1481 * processed.
1482 */
1483static void cpu_acquired(struct dgl_group_req *greq)
1484{
1485 struct server *server = &tsk_rt(greq->task)->server;
1486
1487 TRACE_MC_TASK(greq->task, "Acquired CPU %d\n", greq->cpu);
1488
1489 sched_trace_task_resume(greq->task);
1490 server_state_change(server, SS_ACTIVE, 1);
1491
1492 cpu_update(greq);
1493}
1494
1495static void cpu_preempted(struct dgl_group_req *greq)
1496{
1497 struct server *server = &tsk_rt(greq->task)->server;
1498
1499 TRACE_MC_TASK(greq->task, "Dropping CPU %d\n", greq->cpu);
1500
1501 sched_trace_task_block(greq->task);
1502 server_state_change(server, SS_BLOCKED, 1);
1503
1504 cpu_update(greq);
1505}
1506
1507/* **************************************************************************
1508 * Initialization
1509 * ************************************************************************** */
1510
1511/* Initialize values here so that they are allocated with the module
1512 * and destroyed when the module is unloaded.
1513 */
1514
1515/* LVL-A */
1516DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
1517DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
1518DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
1519/* LVL-B */
1520DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
1521DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
1522/* LVL-C */
1523static struct domain_data _mc_crit_c;
1524static rt_domain_t _mc_crit_c_rt;
1525struct bheap _mc_heap_c;
1526struct bheap_node _mc_nodes_c[NR_CPUS];
1527
1528static long mc_activate_plugin(void)
1529{
1530 struct domain_data *dom_data;
1531 struct domain *dom;
1532 struct domain_data *our_domains[NR_CPUS];
1533 rt_domain_t *rt_dom;
1534 int cpu, n = 0;
1535 long ret;
1536
1537 reset_way_tracker();
1538
1539 interrupt_cpu = atomic_read(&release_master_cpu);
1540
1541 for_each_online_cpu(cpu) {
1542 rt_dom = &per_cpu(_mc_crit_b_rt, cpu);
1543 /* rt_dom->release_master = cpu; */
1544 }
1545
1546 if (cache_preempt && !lock_cache) {
1547 printk(KERN_ERR "LITMUS-MC: specified cache preemption without "
1548 "enabling the locking protocol (lock_cache)\n");
1549 ret = -EINVAL;
1550 goto out;
1551 }
1552
1553 dgl_init(&group_lock, color_cache_info.nr_colors,
1554 color_cache_info.ways);
1555 if (cache_preempt) {
1556 group_lock.assign_priority = deadline_prio;
1557 group_lock.cpu_preempted = cpu_preempted;
1558 }
1559 group_lock.cpu_acquired = cpu_acquired;
1560
1561 for_each_online_cpu(cpu) {
1562 BUG_ON(NR_CPUS <= n);
1563 dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
1564 dom_data = domain_data(dom);
1565 our_domains[cpu] = dom_data;
1566#if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1567 per_cpu(cpus, cpu).event_group =
1568 get_event_group_for(interrupt_cpu);
1569#elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1570 per_cpu(cpus, cpu).event_group = get_event_group_for(cpu);
1571#endif
1572 n++;
1573 }
1574 ret = mc_ce_set_domains(n, our_domains);
1575 if (ret)
1576 goto out;
1577 ret = mc_ce_activate_plugin_common();
1578out:
1579 return ret;
1580}
1581
1582static void mc_release_ts(lt_t time)
1583{
1584 int cpu, cont_id = -1;
1585 char name[TASK_COMM_LEN];
1586 enum crit_level level;
1587 struct cpu_entry *entry;
1588 struct crit_entry *ce;
1589
1590 level = CRIT_LEVEL_A;
1591 strcpy(name, "LVL-A");
1592 for_each_online_cpu(cpu) {
1593 /* if (cpu == interrupt_cpu) */
1594 /* continue; */
1595 entry = &per_cpu(cpus, cpu);
1596 sched_trace_container_param(++cont_id, (const char*)&name);
1597 ce = &entry->crit_entries[level];
1598 sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
1599 server_state_change(&ce->server, SS_ACTIVE, 0);
1600 }
1601
1602 level = CRIT_LEVEL_B;
1603 strcpy(name, "LVL-B");
1604 for_each_online_cpu(cpu) {
1605 /* if (cpu == interrupt_cpu) */
1606 /* continue; */
1607 entry = &per_cpu(cpus, cpu);
1608 sched_trace_container_param(++cont_id, (const char*)&name);
1609 ce = &entry->crit_entries[level];
1610 sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
1611 server_state_change(&ce->server, SS_ACTIVE, 0);
1612 }
1613
1614 level = CRIT_LEVEL_C;
1615 strcpy(name, "LVL-C");
1616 sched_trace_container_param(++cont_id, (const char*)&name);
1617 for_each_online_cpu(cpu) {
1618 entry = &per_cpu(cpus, cpu);
1619 ce = &entry->crit_entries[level];
1620 sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
1621 server_state_change(&ce->server, SS_ACTIVE, 0);
1622 }
1623
1624 mc_ce_release_at_common(NULL, time);
1625}
1626
1627static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
1628 .plugin_name = "MC",
1629 .task_new = mc_task_new,
1630 .complete_job = complete_job,
1631 .task_exit = mc_task_exit,
1632 .schedule = mc_schedule,
1633 .task_wake_up = mc_task_wake_up,
1634 .task_block = mc_task_block,
1635 .admit_task = mc_admit_task,
1636 .activate_plugin = mc_activate_plugin,
1637 .release_at = release_at,
1638 .deactivate_plugin = mc_deactivate_plugin,
1639 .finish_switch = mc_finish_switch,
1640 .release_ts = mc_release_ts,
1641};
1642
1643static void init_crit_entry(struct cpu_entry *entry,
1644 struct crit_entry *ce, enum crit_level level,
1645 struct domain_data *dom_data,
1646 struct bheap_node *node)
1647{
1648
1649 ce->level = level;
1650 ce->server.linked = NULL;
1651 ce->node = node;
1652 ce->domain = &dom_data->domain;
1653 server_init(&ce->server, ce_sid(ce), 0, entry->cpu);
1654 ce->server.parent = -entry->cpu - 1;
1655#ifdef CONFIG_MERGE_TIMERS
1656 init_event(&ce->event, level, mc_ghost_exhausted,
1657 event_list_alloc(GFP_ATOMIC));
1658#else
1659 hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1660 ce->timer.function = mc_ghost_exhausted;
1661#endif
1662
1663}
1664
1665static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data,
1666 enum crit_level level)
1667{
1668 dom_data->heap = NULL;
1669 dom_data->crit_entry = &entry->crit_entries[level];
1670 init_crit_entry(entry, dom_data->crit_entry, level, dom_data, NULL);
1671}
1672
1673static void init_global_domain(struct domain_data *dom_data, enum crit_level level,
1674 struct bheap *heap, struct bheap_node *nodes)
1675{
1676 int cpu;
1677 struct cpu_entry *entry;
1678 struct crit_entry *ce;
1679 struct bheap_node *node;
1680
1681 dom_data->crit_entry = NULL;
1682 dom_data->heap = heap;
1683 bheap_init(heap);
1684
1685 for_each_online_cpu(cpu) {
1686 entry = &per_cpu(cpus, cpu);
1687 node = &nodes[cpu];
1688 ce = &entry->crit_entries[level];
1689 init_crit_entry(entry, ce, level, dom_data, node);
1690 bheap_node_init(&ce->node, ce);
1691 bheap_insert(cpu_lower_prio, heap, node);
1692 }
1693}
1694
1695static void init_edf_domain(struct domain *dom, rt_domain_t *rt,
1696 enum crit_level prio, int is_partitioned, int cpu)
1697{
1698 pd_domain_init(dom, rt, edf_ready_order, NULL,
1699 mc_release_jobs, edf_higher_prio);
1700 rt->level = prio;
1701#if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
1702 /* All timers are on one CPU and release-master is using the event
1703 * merging interface as well. */
1704 BUG_ON(NO_CPU == interrupt_cpu);
1705 rt->event_group = get_event_group_for(interrupt_cpu);
1706 rt->prio = prio;
1707#elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
1708 /* Using release master, but not merging timers. */
1709 /* rt->release_master = interrupt_cpu; */
1710#elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
1711 /* Merge the timers, but don't move them to the release master. */
1712 if (is_partitioned) {
1713 rt->event_group = get_event_group_for(cpu);
1714 } else {
1715 /* Global timers will be added to the event groups that code is
1716 * executing on when add_event() is called.
1717 */
1718 rt->event_group = NULL;
1719 }
1720 rt->prio = prio;
1721#endif
1722}
1723
1724static char* domain_name(const char *name, int cpu)
1725{
1726 char *buf = kmalloc(LITMUS_LOCKDEP_NAME_MAX_LEN * sizeof(char), GFP_ATOMIC);
1727 snprintf(buf, LITMUS_LOCKDEP_NAME_MAX_LEN, "%s%d", name, cpu);
1728 return buf;
1729}
1730
1731struct domain_data *ce_domain_for(int);
1732static int __init init_mc(void)
1733{
1734 int cpu;
1735 rt_domain_t *rt;
1736 raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
1737 struct cpu_entry *entry;
1738 struct domain_data *dom_data;
1739 struct ce_dom_data *ce_data;
1740
1741 for_each_online_cpu(cpu) {
1742 entry = &per_cpu(cpus, cpu);
1743
1744 /* CPU */
1745 entry->cpu = cpu;
1746 entry->scheduled = NULL;
1747 entry->linked = NULL;
1748
1749 raw_spin_lock_init(&entry->lock);
1750 raw_spin_lock_init(&entry->signal_lock);
1751 clear_signal(&entry->signal);
1752
1753#ifdef CONFIG_PLUGIN_MC_REDIRECT
1754 raw_spin_lock_init(&entry->redir_lock);
1755 INIT_LIST_HEAD(&entry->redir);
1756#endif
1757
1758 /* CRIT_LEVEL_A */
1759 dom_data = &per_cpu(_mc_crit_a, cpu);
1760 ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
1761 a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
1762 dom_data->domain.acquire_resources = dumb_acquire;
1763 dom_data->domain.release_resources = dumb_release;
1764 raw_spin_lock_init(a_dom_lock);
1765 ce_domain_init(&dom_data->domain,
1766 a_dom_lock, ce_requeue, ce_peek_and_take_ready,
1767 ce_peek_and_take_ready, ce_higher_prio, ce_data, cpu,
1768 ce_timer_function);
1769 init_local_domain(entry, dom_data, CRIT_LEVEL_A);
1770 dom_data->domain.name = domain_name("LVL-A", cpu);
1771
1772 /* CRIT_LEVEL_B */
1773 dom_data = &per_cpu(_mc_crit_b, cpu);
1774 rt = &per_cpu(_mc_crit_b_rt, cpu);
1775 init_local_domain(entry, dom_data, CRIT_LEVEL_B);
1776 init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu);
1777 dom_data->domain.acquire_resources = acquire_resources;
1778 dom_data->domain.release_resources = release_resources;
1779 b_dom_lock = dom_data->domain.lock;
1780 raw_spin_lock_init(b_dom_lock);
1781
1782 dom_data->domain.name = domain_name("LVL-B", cpu);
1783 }
1784
1785 /* CRIT_LEVEL_C */
1786 init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
1787 &_mc_heap_c, _mc_nodes_c);
1788 init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C,
1789 0, NO_CPU);
1790 _mc_crit_c.domain.acquire_resources = dumb_acquire;
1791 _mc_crit_c.domain.release_resources = dumb_release;
1792 c_dom_lock = _mc_crit_c.domain.lock;
1793 raw_spin_lock_init(c_dom_lock);
1794 _mc_crit_c.domain.name = "LVL-C";
1795
1796
1797 /* GROUP LOCK */
1798 raw_spin_lock_init(&dgl_lock);
1799
1800 return register_sched_plugin(&mc_plugin);
1801}
1802
1803module_init(init_mc);