aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/sched_mc.c
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2011-09-07 18:03:33 -0400
committerJonathan Herman <hermanjl@cs.unc.edu>2011-09-07 18:03:33 -0400
commit0b096fbe159a60c56190f8a627d764051e1e52ea (patch)
tree89535a50e48ae92d0add444684ef28603ea0bf3f /litmus/sched_mc.c
parentd5e965b0074d6ef10f5a77112fc3671613a2150f (diff)
Refactor to allow generic domains
Diffstat (limited to 'litmus/sched_mc.c')
-rw-r--r--litmus/sched_mc.c1624
1 files changed, 581 insertions, 1043 deletions
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
index dcf86d60275a..bc4b46165b64 100644
--- a/litmus/sched_mc.c
+++ b/litmus/sched_mc.c
@@ -8,12 +8,6 @@
8 * This version uses the simple approach and serializes all scheduling 8 * This version uses the simple approach and serializes all scheduling
9 * decisions by the use of a queue lock. This is probably not the 9 * decisions by the use of a queue lock. This is probably not the
10 * best way to do it, but it should suffice for now. 10 * best way to do it, but it should suffice for now.
11 *
12 * --Todo--
13 * Timer Accounting: adjusting the clock values of the ghost timer using
14 * the _tick() method is relatively expensive. This should be changed.
15 * Locks: Accesses to domains should be serialized using a per-domain lock.
16 * CPU locks should be properly used e.g. wip-semi-part
17 */ 11 */
18 12
19#include <linux/spinlock.h> 13#include <linux/spinlock.h>
@@ -21,1106 +15,533 @@
21#include <linux/sched.h> 15#include <linux/sched.h>
22#include <linux/hrtimer.h> 16#include <linux/hrtimer.h>
23#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/module.h>
24 19
25#include <litmus/litmus.h> 20#include <litmus/litmus.h>
26#include <litmus/jobs.h> 21#include <litmus/jobs.h>
27#include <litmus/sched_plugin.h> 22#include <litmus/sched_plugin.h>
28#include <litmus/edf_common.h> 23#include <litmus/edf_common.h>
29#include <litmus/sched_trace.h> 24#include <litmus/sched_trace.h>
30 25#include <litmus/domain.h>
31#include <litmus/bheap.h> 26#include <litmus/bheap.h>
32 27
33#include <linux/module.h>
34
35#include <litmus/sched_mc.h> 28#include <litmus/sched_mc.h>
36 29
37/* Overview of MC operations. 30/* Per CPU per criticality level state */
38 * 31typedef struct {
39 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage 32 enum crit_level level;
40 * structure (NOT the actually scheduled 33 struct task_struct* linked; /* Logically running task */
41 * task). If there is another linked task To 34 domain_t* domain;
42 * already it will set To->linked_on = NO_CPU
43 * (thereby removing its association with this
44 * CPU). However, it will not requeue the
45 * previously linked task (if any). It will set
46 * T's state to RT_F_RUNNING and check whether
47 * it is already running somewhere else. If T
48 * is scheduled somewhere else it will link
49 * it to that CPU instead (and pull the linked
50 * task to cpu). T may be NULL.
51 *
52 * unlink(T) - Unlink removes T from all scheduler data
53 * structures. If it is linked to some CPU it
54 * will link NULL to that CPU. If it is
55 * currently queued in the mc queue it will
56 * be removed from the rt_domain. It is safe to
57 * call unlink(T) if T is not linked. T may not
58 * be NULL.
59 *
60 * requeue(T) - Requeue will insert T into the appropriate
61 * queue. If the system is in real-time mode and
62 * the T is released already, it will go into the
63 * ready queue. If the system is not in
64 * real-time mode is T, then T will go into the
65 * release queue. If T's release time is in the
66 * future, it will go into the release
67 * queue. That means that T's release time/job
68 * no/etc. has to be updated before requeu(T) is
69 * called. It is not safe to call requeue(T)
70 * when T is already queued. T may not be NULL.
71 *
72 * mc_job_arrival(T) - This is the catch all function when T enters
73 * the system after either a suspension or at a
74 * job release. It will queue T (which means it
75 * is not safe to call mc_job_arrival(T) if
76 * T is already queued) and then check whether a
77 * preemption is necessary. If a preemption is
78 * necessary it will update the linkage
79 * accordingly and cause scheduled to be called
80 * (either with an IPI or need_resched). It is
81 * safe to call mc_job_arrival(T) if T's
82 * next job has not been actually released yet
83 * (releast time in the future). T will be put
84 * on the release queue in that case.
85 *
86 * job_completion(T) - Take care of everything that needs to be done
87 * to prepare T for its next release and place
88 * it in the right queue with
89 * mc_job_arrival().
90 *
91 *
92 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
93 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
94 * the functions will automatically propagate pending task from the ready queue
95 * to a linked task. This is the job of the calling function ( by means of
96 * __take_ready).
97 */
98
99
100/* cpu_entry_t - maintain the linked and scheduled state
101 */
102typedef struct {
103 int cpu;
104 struct task_struct* linked; /* only RT tasks */
105 struct task_struct* scheduled; /* only RT tasks */
106 atomic_t will_schedule; /* prevent unneeded IPIs */
107 struct bheap_node* hn_c;
108 struct bheap_node* hn_d;
109 struct task_struct* ghost_tasks[NUM_CRIT_LEVELS];
110} cpu_entry_t;
111
112/*This code is heavily based on Bjoern's budget enforcement code. */
113struct watchdog_timer {
114 /* The watchdog timers determine when ghost jobs finish. */
115 struct hrtimer timer;
116 struct task_struct* task;
117};
118
119DEFINE_PER_CPU(struct watchdog_timer[NUM_CRIT_LEVELS], ghost_timers);
120#define ghost_timer(cpu, crit) (&(per_cpu(ghost_timers, cpu)[crit]))
121
122DEFINE_PER_CPU(cpu_entry_t, mc_cpu_entries);
123 35
124cpu_entry_t* mc_cpus[NR_CPUS]; 36 struct hrtimer timer; /* For ghost task budget enforcement */
37 struct bheap_node* node; /* For membership in global domains */
38} crit_cpu_entry_t;
125 39
126#define set_will_schedule() \ 40/* Per CPU state */
127 (atomic_set(&__get_cpu_var(mc_cpu_entries).will_schedule, 1)) 41typedef struct {
128#define clear_will_schedule() \ 42 int cpu;
129 (atomic_set(&__get_cpu_var(mc_cpu_entries).will_schedule, 0)) 43 struct task_struct* scheduled; /* Task that is physically running */
130#define test_will_schedule(cpu) \ 44 struct task_struct* linked; /* Task that is logically running */
131 (atomic_read(&per_cpu(mc_cpu_entries, cpu).will_schedule))
132#define remote_cpu_entry(cpu) (&per_cpu(mc_cpu_entries, cpu))
133 45
134#define tsk_mc_data(t) (tsk_rt(t)->mc_data) 46 crit_cpu_entry_t crit_entries[NUM_CRIT_LEVELS];
135#define tsk_mc_crit(t) (tsk_mc_data(t)->mc_task.crit) 47} cpu_entry_t;
136#define TRACE_TASK(t, fmt, args...) \
137 TRACE("(%s/%d:%d:%d) " fmt, (t)->comm, (t)->pid, \
138 (t)->rt_param.job_params.job_no, \
139 (tsk_mc_data(t)) ? tsk_mc_crit(t) : -1, ##args)
140
141/* need to do a short-circuit null check on mc_data before checking is_ghost */
142static inline int is_ghost(struct task_struct *t)
143{
144 struct mc_data *mc_data = tsk_mc_data(t);
145 return mc_data && mc_data->mc_job.is_ghost;
146}
147
148/* the cpus queue themselves according to priority in here */
149static struct bheap_node mc_heap_node_c[NR_CPUS], mc_heap_node_d[NR_CPUS];
150static struct bheap mc_cpu_heap_c, mc_cpu_heap_d;
151
152/* Create per-CPU domains for criticality A */
153DEFINE_PER_CPU(rt_domain_t, crit_a);
154#define remote_a_queue(cpu) (&per_cpu(crit_a, cpu))
155#define local_a_queue (&__get_cpu_var(crit_a))
156
157/* Create per-CPU domains for criticality B */
158DEFINE_PER_CPU(rt_domain_t, crit_b);
159#define remote_b_queue(cpu) (&per_cpu(crit_b, cpu))
160#define local_b_queue (&__get_cpu_var(crit_b))
161
162/* Create global domains for criticalities C and D */
163static rt_domain_t crit_c;
164static rt_domain_t crit_d;
165
166/* We use crit_c for shared globals */
167#define global_lock (crit_c.ready_lock)
168#define mc_release_master (crit_c.release_master)
169
170/* BEGIN clone of edf_common.c to allow shared C/D run queue*/
171
172static int mc_edf_higher_prio(struct task_struct* first, struct task_struct*
173 second)
174{
175 /*Only differs from normal EDF when two tasks of differing criticality
176 are compared.*/
177 if (first && second) {
178 enum crit_level first_crit = tsk_mc_crit(first);
179 enum crit_level second_crit = tsk_mc_crit(second);
180 /*Lower criticality numbers are higher priority*/
181 if (first_crit < second_crit) {
182 return 1;
183 }
184 else if (second_crit < first_crit) {
185 return 0;
186 }
187 }
188 return edf_higher_prio(first, second);
189}
190 48
191static int mc_edf_entry_higher_prio(cpu_entry_t* first, cpu_entry_t* second, 49/* Wrapper necessary until cpu linking code is moved into header file */
192 enum crit_level crit) 50typedef struct domain_data {
193{ 51 domain_t domain;
194 struct task_struct *first_active, *second_active; 52 struct bheap* heap; /* For global domains */
195 first_active = first->linked; 53 crit_cpu_entry_t* crit_entry; /* For partitioned domains */
196 second_active = second->linked; 54} domain_data_t;
197 if (first->ghost_tasks[crit]) { 55
198 first_active = first->ghost_tasks[crit]; 56static cpu_entry_t* cpus[NR_CPUS];
199 } 57static raw_spinlock_t global_lock;
200 if (second->ghost_tasks[crit]) { 58
201 second_active = second->ghost_tasks[crit]; 59#define domain_data(dom) (container_of(dom, domain_data_t, domain))
202 } 60#define is_global(dom) (domain_data(dom)->heap)
203 return mc_edf_higher_prio(first_active, second_active); 61#define is_global_task(t) (is_global(get_task_domain(t)))
204} 62#define crit_cpu(ce) \
63 (container_of((void*)(ce - ce->level), cpu_entry_t, crit_entries))
64
65#define TS "(%s/%d:%d:%s)"
66#define TA(t) (t) ? (is_ghost(t)) ? "ghost" : t->comm : "NULL", (t) ? t->pid : 1, \
67 (t) ? t->rt_param.job_params.job_no : 1, \
68 (t && get_task_domain(t)) ? get_task_domain(t)->name : ""
69#define TRACE_ENTRY(e, fmt, args...) \
70 TRACE("P%d, linked=" TS " " fmt "\n", \
71 e->cpu, TA(e->linked), ##args)
72#define TRACE_CRIT_ENTRY(ce, fmt, args...) \
73 TRACE("%s P%d, linked=" TS " " fmt "\n", \
74 ce->domain->name, crit_cpu(ce)->cpu, TA(ce->linked), ##args)
75#define TRACE_TASK(t, fmt, args...) \
76 TRACE(TS " " fmt "\n", TA(t), ##args)
205 77
206/* need_to_preempt - check whether the task t needs to be preempted 78/*
207 * call only with irqs disabled and with ready_lock acquired 79 * Sort CPUs within a global domain by the domain's priority function.
208 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
209 */ 80 */
210static int mc_edf_preemption_needed(rt_domain_t* rt, enum crit_level crit, 81static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
211 cpu_entry_t* entry)
212{ 82{
213 struct task_struct *active_task; 83 domain_t *domain;
84 crit_cpu_entry_t *first, *second;
85 struct task_struct *first_link, *second_link;
214 86
215 /* we need the read lock for edf_ready_queue */ 87 first = a->value;
216 /* no need to preempt if there is nothing pending */ 88 second = b->value;
217 if (!__jobs_pending(rt)) 89 first_link = first->linked;
218 return 0; 90 second_link = second->linked;
219 91
220 active_task = entry->linked; 92 if (!first_link || !second_link) {
221 /* A ghost task can only exist if we haven't scheduled something above 93 return second_link && !first_link;
222 * its level 94 } else {
223 */ 95 domain = get_task_domain(first_link);
224 if (entry->ghost_tasks[crit]) { 96 BUG_ON(domain != get_task_domain(second_link));
225 active_task = entry->ghost_tasks[crit]; 97 return domain->higher_prio(second_link, first_link);
226 } 98 }
227 /* we need to reschedule if t doesn't exist */
228 if (!active_task)
229 return 1;
230
231 /* NOTE: We cannot check for non-preemptibility since we
232 * don't know what address space we're currently in.
233 */
234
235 /* make sure to get non-rt stuff out of the way */
236 return !is_realtime(active_task) ||
237 mc_edf_higher_prio(__next_ready(rt), active_task);
238}
239
240static int mc_edf_ready_order(struct bheap_node* a, struct bheap_node* b)
241{
242 return mc_edf_higher_prio(bheap2task(a), bheap2task(b));
243}
244
245static void mc_edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
246 release_jobs_t release)
247{
248 rt_domain_init(rt, mc_edf_ready_order, resched, release);
249}
250
251#define WANT_ALL_SCHED_EVENTS
252
253/* Called by update_cpu_position and lowest_prio_cpu in bheap operations
254 * Callers always have global lock
255*/
256static int cpu_lower_prio_c(struct bheap_node *_a, struct bheap_node *_b)
257{
258 cpu_entry_t *a, *b;
259 a = _a->value;
260 b = _b->value;
261 /* Note that a and b are inverted: we want the lowest-priority CPU at
262 * the top of the heap.
263 */
264 return mc_edf_entry_higher_prio(b, a, CRIT_LEVEL_C);
265}
266
267/* Called by update_cpu_position and lowest_prio_cpu in bheap operations
268 * Callers always have global lock
269*/
270static int cpu_lower_prio_d(struct bheap_node *_a, struct bheap_node *_b)
271{
272 cpu_entry_t *a, *b;
273 a = _a->value;
274 b = _b->value;
275 /* Note that a and b are inverted: we want the lowest-priority CPU at
276 * the top of the heap.
277 */
278 return mc_edf_entry_higher_prio(b, a, CRIT_LEVEL_D);
279}
280
281/* update_cpu_position - Move the cpu entry to the correct place to maintain
282 * order in the cpu queue. Caller must hold global lock.
283 * Called from link_task_to_cpu, which holds global lock
284 * link_task_to_cpu is the only way a CPU can get a new task, and hence have its
285 * priority change.
286 */
287static void update_cpu_position(cpu_entry_t *entry)
288{
289 if (likely(bheap_node_in_heap(entry->hn_c)))
290 bheap_delete(cpu_lower_prio_c, &mc_cpu_heap_c, entry->hn_c);
291 if (likely(bheap_node_in_heap(entry->hn_d)))
292 bheap_delete(cpu_lower_prio_d, &mc_cpu_heap_d, entry->hn_d);
293 bheap_insert(cpu_lower_prio_c, &mc_cpu_heap_c, entry->hn_c);
294 bheap_insert(cpu_lower_prio_d, &mc_cpu_heap_d, entry->hn_d);
295} 99}
296 100
297/* caller must hold global lock 101/*
298 * Only called when checking for gedf preemptions by check_for_gedf_preemptions, 102 * Return next CPU which should preempted or NULL if the domain has no
299 * which always has global lock 103 * preemptable CPUs.
300 */
301static cpu_entry_t* lowest_prio_cpu_c(void)
302{
303 struct bheap_node* hn;
304 hn = bheap_peek(cpu_lower_prio_c, &mc_cpu_heap_c);
305 return hn->value;
306}
307
308/* caller must hold global lock
309 * Only called when checking for gedf preemptions by check_for_gedf_preemptions,
310 * which always has global lock
311 */ 104 */
312static cpu_entry_t* lowest_prio_cpu_d(void) 105static inline crit_cpu_entry_t* lowest_prio_cpu(domain_t *dom)
313{ 106{
107 struct bheap *heap = domain_data(dom)->heap;
314 struct bheap_node* hn; 108 struct bheap_node* hn;
315 hn = bheap_peek(cpu_lower_prio_d, &mc_cpu_heap_d); 109 hn = bheap_peek(cpu_lower_prio, heap);
316 return hn->value; 110 return (hn) ? hn->value : NULL;
317} 111}
318 112
319/* Forward Declarations*/ 113/*
320static noinline void unlink(struct task_struct* t); 114 * Time accounting for ghost tasks. Called during ticks and linking.
321static noinline void job_completion(struct task_struct *t, int forced);
322
323/* update_ghost_time - Do time accounting for a ghost job.
324 * Updates ghost budget and handles expired ghost budget.
325 * Called from unlink(), mc_tick().
326 * Caller holds global lock.
327 */ 115 */
328static void update_ghost_time(struct task_struct *p) 116static void update_ghost_time(struct task_struct *p)
329{ 117{
330 u64 delta; 118 u64 delta, clock;
331 u64 clock;
332 119
333 BUG_ON(!is_ghost(p)); 120 BUG_ON(!is_ghost(p));
334 clock = litmus_clock(); 121 clock = litmus_clock();
335 delta = clock - p->se.exec_start; 122 delta = clock - p->se.exec_start;
336 if (unlikely ((s64)delta < 0)) { 123 if (unlikely ((s64)delta < 0)) {
337 delta = 0; 124 delta = 0;
338 TRACE_TASK(p, "WARNING: negative time delta.\n"); 125 TRACE_TASK(p, "WARNING: negative time delta");
339 } 126 }
340 if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) { 127 if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) {
341 /* Currently will just set ghost budget to zero since 128 TRACE_TASK(p, "Ghost job could have ended");
342 * task has already been queued. Could probably do
343 * more efficiently with significant reworking.
344 */
345 TRACE_TASK(p, "Ghost job could have ended\n");
346 tsk_mc_data(p)->mc_job.ghost_budget = 0; 129 tsk_mc_data(p)->mc_job.ghost_budget = 0;
347 p->se.exec_start = clock; 130 p->se.exec_start = clock;
348 } else { 131 } else {
349 TRACE_TASK(p, "Ghost jub updated, but didn't finish\n"); 132 TRACE_TASK(p, "Ghost job updated, but didn't finish");
350 tsk_mc_data(p)->mc_job.ghost_budget -= delta; 133 tsk_mc_data(p)->mc_job.ghost_budget -= delta;
351 p->se.exec_start = clock; 134 p->se.exec_start = clock;
352 } 135 }
353} 136}
354 137
355/* 138/*
356 * 139 * Logically set running task for a domain on a CPU.
357 */ 140 */
358static void cancel_watchdog_timer(struct watchdog_timer* wt) 141static void link_task_to_crit(crit_cpu_entry_t *ce,
142 struct task_struct *task)
359{ 143{
360 int ret; 144 lt_t when_to_fire;
361 145 struct bheap *heap;
362 if (wt->task) { 146
363 TRACE_TASK(wt->task, "Cancelling watchdog timer.\n"); 147 TRACE_TASK(task, "Linking to P%d", crit_cpu(ce)->cpu);
364 ret = hrtimer_try_to_cancel(&wt->timer); 148 BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
365 /*Should never be inactive.*/ 149 BUG_ON(task && is_global(ce->domain) &&
366 BUG_ON(ret == 0); 150 !bheap_node_in_heap(ce->node));
367 /*Running concurrently is an unusual situation - log it. */ 151
368 /*TODO: is there a way to prevent this? This probably means 152 /* Unlink last task */
369 * the timer task is waiting to acquire the lock while the 153 if (ce->linked) {
370 * cancellation attempt is happening. 154 TRACE_TASK(ce->linked, "Unlinking");
371 * 155 ce->linked->rt_param.linked_on = NO_CPU;
372 * And are we even in a correct state when this happens? 156 if (is_ghost(ce->linked)) {
373 */ 157 hrtimer_try_to_cancel(&ce->timer);
374 if (ret == -1) 158 if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) {
375 TRACE_TASK(wt->task, "Timer cancellation " 159 /* Job isn't finished, so do accounting */
376 "attempted while task completing\n"); 160 update_ghost_time(ce->linked);
161 }
162 }
163 }
164
165 /* Actually link task */
166 ce->linked = task;
167 if (task) {
168 task->rt_param.linked_on = crit_cpu(ce)->cpu;
169 if (is_ghost(task)) {
170 /* Reset budget timer */
171 task->se.exec_start = litmus_clock();
172 when_to_fire = litmus_clock() +
173 tsk_mc_data(task)->mc_job.ghost_budget;
174 __hrtimer_start_range_ns(&ce->timer,
175 ns_to_ktime(when_to_fire),
176 0 /* delta */,
177 HRTIMER_MODE_ABS_PINNED,
178 0 /* no wakeup */);
179 }
180 }
377 181
378 wt->task = NULL; 182 /* Update global heap node position */
183 if (is_global(ce->domain) && bheap_node_in_heap(ce->node)) {
184 heap = domain_data(ce->domain)->heap;
185 bheap_delete(cpu_lower_prio, heap, ce->node);
186 bheap_insert(cpu_lower_prio, heap, ce->node);
379 } 187 }
380} 188}
381 189
382/* link_task_to_cpu - Update the link of a CPU. 190static void check_for_preempt(domain_t*);
383 * Handles the case where the to-be-linked task is already 191
384 * scheduled on a different CPU. 192/*
385 * Also handles ghost jobs and preemption of ghost jobs. 193 * Catch all function for when a task enters the system after a suspension
386 * Called from unlink(), prepare_preemption(), and mc_schedule() 194 * or a release. Requeues the task and causes a preemption, if necessary.
387 * Callers hold global lock
388 */ 195 */
389static noinline void link_task_to_cpu(struct task_struct* linked, 196static void job_arrival(struct task_struct* task)
390 cpu_entry_t *entry)
391{ 197{
392 cpu_entry_t *sched; 198 domain_t *dom = get_task_domain(task);
393 struct task_struct* tmp;
394 int on_cpu;
395 int i;
396 struct watchdog_timer* timer;
397 lt_t when_to_fire;
398 199
399 BUG_ON(linked && !is_realtime(linked)); 200 TRACE_TASK(task, "Job arriving");
400 BUG_ON(linked && is_realtime(linked) && 201 BUG_ON(!task);
401 (tsk_mc_crit(linked) < CRIT_LEVEL_C) &&
402 (tsk_rt(linked)->task_params.cpu != entry->cpu));
403
404 if (linked && is_ghost(linked)) {
405 TRACE_TASK(linked, "Linking ghost job to CPU %d.\n",
406 entry->cpu);
407 BUG_ON(entry->linked &&
408 tsk_mc_crit(entry->linked) < tsk_mc_crit(linked));
409 202
410 tmp = entry->ghost_tasks[tsk_mc_crit(linked)]; 203 if (!is_global(dom) || tsk_rt(task)->scheduled_on == NO_CPU) {
411 if (tmp) { 204 dom->requeue(dom, task);
412 unlink(tmp); 205 check_for_preempt(dom);
413 } 206 } else {
414 /* We shouldn't link a ghost job that is already somewhere 207 /* If a global task is scheduled on one cpu, it CANNOT
415 * else (or here) - the caller is responsible for unlinking] 208 * be requeued into a global domain. Another cpu might
416 * first. 209 * dequeue the global task before it is descheduled,
210 * causing the system to crash when the task is scheduled
211 * in two places simultaneously.
417 */ 212 */
418 BUG_ON(linked->rt_param.linked_on != NO_CPU); 213 TRACE_TASK(task, "Delayed arrival of scheduled task");
419 linked->rt_param.linked_on = entry->cpu;
420 linked->se.exec_start = litmus_clock();
421 entry->ghost_tasks[tsk_mc_crit(linked)] = linked;
422 /* Set up the watchdog timer. */
423 timer = ghost_timer(entry->cpu, tsk_mc_crit(linked));
424 if (timer->task) {
425 cancel_watchdog_timer(timer);
426 }
427 when_to_fire = litmus_clock() +
428 tsk_mc_data(linked)->mc_job.ghost_budget;
429 timer->task = linked;
430 __hrtimer_start_range_ns(&timer->timer,
431 ns_to_ktime(when_to_fire),
432 0 /* delta */,
433 HRTIMER_MODE_ABS_PINNED,
434 0 /* no wakeup */);
435 } 214 }
436 else{ 215}
437 /* Currently linked task is set to be unlinked. */ 216
438 if (entry->linked) { 217/*
439 entry->linked->rt_param.linked_on = NO_CPU; 218 * Logically run a task on a CPU. The task must first have been linked
219 * to one of the criticalities running on this CPU.
220 */
221static void link_task_to_cpu(cpu_entry_t *entry, struct task_struct *task)
222{
223 int i, in_heap;
224 crit_cpu_entry_t *ce;
225 struct bheap *heap;
226 struct task_struct *tmp;
227 enum crit_level last, next;
228
229 next = (task) ? tsk_mc_crit(task) : NUM_CRIT_LEVELS - 1;
230 last = (entry->linked) ? tsk_mc_crit(entry->linked) :
231 NUM_CRIT_LEVELS - 1;
232
233 TRACE_TASK(task, "Linking to P%d", entry->cpu);
234 BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
235 BUG_ON(task && is_ghost(task));
236 BUG_ON(entry->linked && task && tsk_mc_crit(entry->linked) < next);
237
238 /* Actually link task */
239 if (task && !is_ghost(task)) {
240 set_rt_flags(task, RT_F_RUNNING);
241 entry->linked = task;
242 } else {
243 entry->linked = NULL;
244 }
245
246 /* Update CPU states */
247 for (i = ((next < last) ? next : last);
248 i <= ((next > last) ? next : last); i++) {
249 ce = &entry->crit_entries[i];
250
251 /* Put CPU only in heaps which can preempt the linked task */
252 if (is_global(ce->domain)) {
253 heap = domain_data(ce->domain)->heap;
254 in_heap = bheap_node_in_heap(ce->node);
255 if (ce->level > next && in_heap) {
256 bheap_delete(cpu_lower_prio, heap, ce->node);
257 } else if ((ce->level < next || !task) && !in_heap) {
258 bheap_insert(cpu_lower_prio, heap, ce->node);
259 }
440 } 260 }
441 261
442 /* Link new task to CPU. */ 262 /* Remove and requeue lower priority tasks on this CPU */
443 if (linked) { 263 if (ce->linked && ce->level > next) {
444 set_rt_flags(linked, RT_F_RUNNING); 264 TRACE_TASK(ce->linked, "Removed by higher priority");
445 /* handle task is already scheduled somewhere! */ 265 tmp = ce->linked;
446 on_cpu = linked->rt_param.scheduled_on; 266 link_task_to_crit(ce, NULL);
447 if (on_cpu != NO_CPU) { 267 if (is_global(ce->domain)) {
448 sched = &per_cpu(mc_cpu_entries, on_cpu); 268 /* Need to check for a preemption.
449 /* this should only happen if not linked 269 * We know this CPU is no longer in the heap
450 * already 270 * so it cannot get re-preempted here.
451 */
452 BUG_ON(sched->linked == linked);
453
454 /* If we are already scheduled on the CPU to
455 * which we wanted to link, we don't need to do
456 * the swap -- we just link ourselves to the
457 * CPU and depend on the caller to get things
458 * right.
459 *
460 * Also, we can only safely swap if neither
461 * task is partitioned.
462 */ 271 */
463 tmp = sched->linked; 272 job_arrival(tmp);
464 if (entry != sched && tsk_mc_crit(linked) > 273 } else {
465 CRIT_LEVEL_B && 274 ce->domain->requeue(ce->domain, tmp);
466 (!tmp || tsk_mc_crit(tmp)
467 > CRIT_LEVEL_B)) {
468 TRACE_TASK(linked,
469 "already scheduled on %d, updating link.\n",
470 sched->cpu);
471 linked->rt_param.linked_on = sched->cpu;
472 sched->linked = linked;
473 for (i = tsk_mc_crit(linked);
474 i < NUM_CRIT_LEVELS; i++) {
475 if (sched->ghost_tasks[i]) {
476 unlink(sched->
477 ghost_tasks[i]);
478 }
479 }
480 update_cpu_position(sched);
481 linked = tmp;
482 }
483 }
484 if (linked) { /* might be NULL due to swap */
485 linked->rt_param.linked_on = entry->cpu;
486 for (i = tsk_mc_crit(linked);
487 i < NUM_CRIT_LEVELS; i++) {
488 if (entry->ghost_tasks[i]) {
489 unlink(entry->ghost_tasks[i]);
490 /* WARNING: it is up to the
491 * caller to requeue ghost jobs
492 */
493 }
494 }
495 } 275 }
496 } 276 }
497 entry->linked = linked;
498 } 277 }
499#ifdef WANT_ALL_SCHED_EVENTS
500 if (linked)
501 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
502 else
503 TRACE("NULL linked to %d.\n", entry->cpu);
504#endif
505 update_cpu_position(entry);
506} 278}
507 279
508/* unlink - Make sure a task is not linked any longer to a cpu entry 280/*
509 * where it was linked before. 281 * Preempt logically running task in a domain. If the preempting task should be
510 * Can handle ghost jobs. 282 * running on the domain's CPU, also links the task to the CPU and causes
511 * Called by schedule, task_block, task_exit, and job_completion 283 * a physical preemption.
512 * Caller assumed to hold global lock
513 */ 284 */
514static noinline void unlink(struct task_struct* t) 285static void preempt(domain_t *dom, crit_cpu_entry_t *ce)
515{ 286{
516 int cpu; 287 struct task_struct *task = dom->take_ready(dom);
517 cpu_entry_t *entry; 288 cpu_entry_t *entry = crit_cpu(ce);
518 struct watchdog_timer *timer; 289
290 TRACE_CRIT_ENTRY(ce, "Preempted by " TS, TA(task));
291 BUG_ON(!task);
519 292
520 if (unlikely(!t)) { 293 /* Per-domain preemption */
521 BUG_ON(1); 294 if (ce->linked) {
522 return; 295 dom->requeue(dom, ce->linked);
523 } 296 }
297 link_task_to_crit(ce, task);
524 298
525 cpu = t->rt_param.linked_on; 299 /* Preempt actual execution if this is a running task */
526 if (cpu != NO_CPU) { 300 if (!is_ghost(task)) {
527 /* unlink */ 301 link_task_to_cpu(entry, task);
528 entry = remote_cpu_entry(cpu); 302 preempt_if_preemptable(entry->scheduled, entry->cpu);
529 t->rt_param.linked_on = NO_CPU;
530 if (is_ghost(t)) {
531 /* Clear the timer if it's set.
532 * It may be unset if we are called as a result of
533 * the watchdog timer triggering.
534 */
535 timer = ghost_timer(cpu, tsk_mc_crit(t));
536 if (timer->task) {
537 /* Should already be watching task.*/
538 BUG_ON(timer->task != t);
539 cancel_watchdog_timer(timer);
540 }
541 if (tsk_mc_data(t)->mc_job.ghost_budget > 0) {
542 /* Job isn't finished, so do accounting. */
543 update_ghost_time(t);
544 /* Just remove from CPU, even in the rare case
545 * of zero time left - it will be scheduled
546 * with an immediate timer fire.
547 */
548 entry->ghost_tasks[tsk_mc_crit(t)] = NULL;
549 /*TODO: maybe make more efficient by
550 * only updating on C/D completion?
551 */
552 update_cpu_position(entry);
553 }
554 else{
555 /* Job finished, so just remove */
556 entry->ghost_tasks[tsk_mc_crit(t)] = NULL;
557 update_cpu_position(entry);
558 }
559 }
560 else {
561 link_task_to_cpu(NULL, entry);
562 }
563 } else if (is_queued(t)) {
564 /* This is an interesting situation: t is scheduled,
565 * but was just recently unlinked. It cannot be
566 * linked anywhere else (because then it would have
567 * been relinked to this CPU), thus it must be in some
568 * queue. We must remove it from the list in this
569 * case.
570 */
571 TRACE("Weird is_queued situation happened\n");
572 remove(tsk_rt(t)->domain, t);
573 } 303 }
574} 304}
575 305
576 306/*
577/* preempt - force a CPU to reschedule 307 * Causes a logical preemption if the domain has a higher-priority ready task.
578 * Just sets a Linux scheduler flag.
579 */ 308 */
580static void preempt(cpu_entry_t *entry) 309static void check_for_preempt(domain_t *dom)
581{ 310{
582 preempt_if_preemptable(entry->scheduled, entry->cpu); 311 int lower_prio;
312 cpu_entry_t *entry;
313 crit_cpu_entry_t *ce;
314
315 if (is_global(dom)) {
316 /* If a higher priority task is running on a CPU,
317 * it will not be present in the domain heap.
318 */
319 for (ce = lowest_prio_cpu(dom);
320 ce && dom->preempt_needed(dom, ce->linked);
321 ce = lowest_prio_cpu(dom)) {
322
323 preempt(dom, ce);
324 }
325 } else /* Partitioned */ {
326 ce = domain_data(dom)->crit_entry;
327 entry = crit_cpu(ce);
328 /* A higher priority task might be running, in which case
329 * this level cannot link any task.
330 */
331 lower_prio = entry->linked &&
332 tsk_mc_crit(entry->linked) < ce->level;
333 if (!lower_prio && dom->preempt_needed(dom, ce->linked)) {
334 preempt(dom, ce);
335 }
336 }
583} 337}
584 338
585/* requeue - Put an unlinked task into the proper domain. 339/*
586 * Caller holds global lock. 340 * Remove a running task from all structures.
587 * Called by mc_job_arrival() and prepare_preemption().
588 */ 341 */
589static noinline void requeue(struct task_struct* task) 342static void remove_from_all(struct task_struct* task)
590{ 343{
591 BUG_ON(!task || !is_realtime(task)); 344 int cpu, level;
592 /* sanity check before insertion */ 345 cpu_entry_t *entry;
593 BUG_ON(is_queued(task)); 346 crit_cpu_entry_t *ce;
594 347
595 if (is_released(task, litmus_clock())) { 348 TRACE_TASK(task, "Removing from everything");
596 __add_ready(tsk_rt(task)->domain, task); 349 BUG_ON(!task);
597 } else {
598 /* it has got to wait */
599 add_release(tsk_rt(task)->domain, task);
600 }
601}
602 350
603static void prepare_preemption(rt_domain_t *dom, cpu_entry_t *cpu, 351 cpu = task->rt_param.linked_on;
604 enum crit_level crit) { 352 level = tsk_mc_crit(task);
605 struct task_struct* task; 353 if (cpu != NO_CPU) {
606 int i; 354 /* Unlink */
607 task = __take_ready(dom); 355 entry = cpus[cpu];
608 TRACE("prepare_preemption: attempting to link task %d to %d\n", 356 ce = &entry->crit_entries[level];
609 task->pid, cpu->cpu); 357 link_task_to_crit(ce, NULL);
610 if (is_ghost(task)) { 358 if (!is_ghost(task)) {
611 /* Changing ghost task only affects linked task at our level */ 359 link_task_to_cpu(entry, NULL);
612 if (cpu->linked && tsk_mc_crit(cpu->linked) == crit)
613 requeue(cpu->linked);
614 /* Can change ghost task at our level as well. */
615 if (cpu->ghost_tasks[crit])
616 requeue(cpu->ghost_tasks[crit]);
617 }
618 else {
619 /* Changing linked tasks could affect both real and ghost
620 * tasks at multiple levels
621 */
622 if (cpu->linked)
623 requeue(cpu->linked);
624 for (i = crit; i < NUM_CRIT_LEVELS; i++) {
625 if (cpu->ghost_tasks[i])
626 requeue(cpu->ghost_tasks[i]);
627 } 360 }
361 BUG_ON(is_queued(task));
362 } else if (is_queued(task)) {
363 /* This is an interesting situation: t is scheduled,
364 * but was just recently unlinked. It cannot be
365 * linked anywhere else (because then it would have
366 * been relinked to this CPU), thus it must be in some
367 * queue. We must remove it from the list in this
368 * case.
369 */
370 TRACE_TASK(task, "Weird is_queued situation happened");
371 remove((rt_domain_t*)get_task_domain(task)->data, task);
628 } 372 }
629 link_task_to_cpu(task, cpu);
630 preempt(cpu);
631} 373}
632 374
633/* Callers always have global lock for functions in this section*/ 375/*
634static noinline void check_for_c_preemptions(rt_domain_t *dom) { 376 * Prepares a task for its next period and causes a preemption, if necessary.
635 cpu_entry_t* last; 377 * Converts tasks which completed their execution early into ghost tasks.
636 TRACE("Checking for c preempt"); 378 */
637 for (last = lowest_prio_cpu_c(); 379static void job_completion(struct task_struct *task, int forced)
638 mc_edf_preemption_needed(dom, CRIT_LEVEL_C, 380{
639 last); 381 TRACE_TASK(task, "Completed");
640 last = lowest_prio_cpu_c()) { 382 sched_trace_task_completion(task, forced);
641 prepare_preemption(dom, last, CRIT_LEVEL_C); 383 BUG_ON(!task);
642 }
643}
644 384
645static noinline void check_for_d_preemptions(rt_domain_t *dom) { 385 /* Logically stop the task execution */
646 cpu_entry_t* last; 386 set_rt_flags(task, RT_F_SLEEP);
647 TRACE("Checking for d preempt"); 387 remove_from_all(task);
648 for (last = lowest_prio_cpu_d();
649 mc_edf_preemption_needed(dom, CRIT_LEVEL_D,
650 last);
651 last = lowest_prio_cpu_d()) {
652 prepare_preemption(dom, last, CRIT_LEVEL_D);
653 }
654}
655 388
656static noinline void check_for_a_preemption(rt_domain_t *dom, cpu_entry_t *cpu) { 389 /* If it's not a ghost job, do ghost job conversion */
657 TRACE("Checking for a preempt"); 390 if (!is_ghost(task)) {
658 if (mc_edf_preemption_needed(dom, CRIT_LEVEL_A, cpu)) { 391 tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task);
659 prepare_preemption(dom, cpu, CRIT_LEVEL_A); 392 tsk_mc_data(task)->mc_job.is_ghost = 1;
660 } 393 }
661}
662 394
663static noinline void check_for_b_preemption(rt_domain_t *dom, cpu_entry_t *cpu) { 395 /* If the task is a ghost job with no budget, it either exhausted
664 TRACE("Checking for b preempt"); 396 * its ghost budget or there was no ghost budget after the job
665 if (mc_edf_preemption_needed(dom, CRIT_LEVEL_B, cpu)) { 397 * conversion. Revert back to a normal task and complete the period.
666 prepare_preemption(dom, cpu, CRIT_LEVEL_B); 398 */
399 if (tsk_mc_data(task)->mc_job.ghost_budget == 0) {
400 tsk_mc_data(task)->mc_job.is_ghost = 0;
401 prepare_for_next_period(task);
402 if (is_released(task, litmus_clock())) {
403 sched_trace_task_release(task);
404 }
667 } 405 }
406
407 /* Requeue non-blocking tasks */
408 if (is_running(task))
409 job_arrival(task);
668} 410}
669 411
670/* mc_job_arrival: task is either resumed or released 412/*
671 * Called from job_completion(), mc_task_new(), and mc_task_wake_up(), all 413 * Return true if the domain has a higher priority ready task. The curr
672 * of which have the global lock 414 * task must belong to the domain.
673 * Requeues task and checks for/triggers preemptions
674 */ 415 */
675static noinline void mc_job_arrival(struct task_struct* task) 416static noinline int mc_preempt_needed(domain_t *dom, struct task_struct* curr)
676{ 417{
677 enum crit_level task_crit_level; 418 struct task_struct *next = dom->peek_ready(dom);
678 BUG_ON(!task);
679 419
680 TRACE("mc_job_arrival triggered\n"); 420 if (!next || !curr) {
681 task_crit_level = tsk_mc_crit(task); 421 return next && !curr;
682 requeue(task); 422 } else {
683 if (task_crit_level == CRIT_LEVEL_A) { 423 BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
684 check_for_a_preemption(remote_a_queue(get_partition(task)), 424 return get_task_domain(next)->higher_prio(next, curr);
685 remote_cpu_entry(get_partition(task)));
686 } else if (task_crit_level == CRIT_LEVEL_B) {
687 check_for_b_preemption(remote_b_queue(get_partition(task)),
688 remote_cpu_entry(get_partition(task)));
689 } else if (task_crit_level == CRIT_LEVEL_C) {
690 check_for_c_preemptions(&crit_c);
691 } else if (task_crit_level == CRIT_LEVEL_D) {
692 check_for_d_preemptions(&crit_d);
693 } 425 }
694} 426}
695 427
696/* Called by the domain 428/*
697 * Obtains global lock, merges ready tasks, checks for/triggers preemptions, 429 * Completes a logically (but not physically) running ghost task.
698 * and releases global lock 430 */
699*/ 431static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
700static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
701{ 432{
702 unsigned long flags; 433 unsigned long flags;
703 int i; 434 crit_cpu_entry_t *ce;
704 435
705 raw_spin_lock_irqsave(&global_lock, flags); 436 raw_spin_lock_irqsave(&global_lock, flags);
706 TRACE("mc_release_jobs triggered\n");
707 437
708 __merge_ready(rt, tasks); 438 ce = container_of(timer, crit_cpu_entry_t, timer);
439 TRACE_CRIT_ENTRY(ce, "Ghost exhausted firing");
709 440
710 for (i = 0; i < NR_CPUS; i++) { 441 /* Due to race conditions, we cannot just set the linked
711 if (rt == remote_b_queue(i)) { 442 * task's budget to 0 as it may no longer be the task
712 check_for_b_preemption(rt, remote_cpu_entry(i)); 443 * for which this timer was armed.
713 } 444 */
714 else if (rt == remote_a_queue(i)) { 445 if (ce->linked && is_ghost(ce->linked)) {
715 check_for_a_preemption(rt, remote_cpu_entry(i)); 446 update_ghost_time(ce->linked);
447 if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) {
448 job_completion(ce->linked, 0);
449 goto out;
716 } 450 }
717 } 451 }
718 if (rt == &crit_c) {
719 check_for_c_preemptions(rt);
720 }
721 if (rt == &crit_d) {
722 check_for_d_preemptions(rt);
723 }
724 452
453 TRACE_TASK(ce->linked, "Was not exhausted");
454 out:
725 raw_spin_unlock_irqrestore(&global_lock, flags); 455 raw_spin_unlock_irqrestore(&global_lock, flags);
456 return HRTIMER_NORESTART;
726} 457}
727 458
728/* caller holds global_lock 459/*
729 * Called only by mc_schedule() which holds global lock 460 * Adds released jobs to a domain and causes a preemption, if necessary.
730 * Prepares task for next period, unlinks it, and calls mc_job_arrival 461 */
731 * Converts jobs to ghost jobs as necessary, or finishes end of ghost jobs. 462static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
732*/
733static noinline void job_completion(struct task_struct *t, int forced)
734{ 463{
735 cpu_entry_t *cpu; 464 unsigned long flags;
736 BUG_ON(!t); 465 struct task_struct *first;
737 466
738 sched_trace_task_completion(t, forced); 467 raw_spin_lock_irqsave(&global_lock, flags);
739 468
740 TRACE_TASK(t, "job_completion().\n"); 469 first = bheap_peek(rt->order, tasks)->value;
470 TRACE_TASK(first, "Jobs released");
741 471
742 /* set flags */ 472 __merge_ready(rt, tasks);
743 set_rt_flags(t, RT_F_SLEEP); 473 check_for_preempt(get_task_domain(first));
744 /* If it's not a ghost job, do ghost job conversion and return if
745 * needed.
746 */
747 if (!is_ghost(t)) {
748 TRACE_TASK(t, "Converting to ghost from %d.\n", t->rt_param.scheduled_on);
749 cpu = remote_cpu_entry(t->rt_param.scheduled_on);
750 /*Unlink first while it's not a ghost job.*/
751 unlink(t);
752 tsk_mc_data(t)->mc_job.ghost_budget = budget_remaining(t);
753 tsk_mc_data(t)->mc_job.is_ghost = 1;
754
755 /* If we did just convert the job to ghost, we can safely
756 * reschedule it and then let schedule() determine a new
757 * job to run in the slack.
758 *
759 * If it actually needs to run as a ghost job, we'll do so
760 * here.
761 *
762 * If it doesn't need to, it will fall through and be handled
763 * properly as well.
764 */
765 if (tsk_mc_data(t)->mc_job.ghost_budget > 0) {
766 link_task_to_cpu(t, cpu);
767 preempt(cpu);
768 return;
769 }
770 }
771 /* prepare for next period - we either just became ghost but with no
772 * budget left, or we were already ghost and the ghost job expired*/
773 if (is_ghost(t)) {
774 tsk_mc_data(t)->mc_job.ghost_budget = 0;
775 /*Need to unlink here so prepare_for_next_period doesn't try
776 * to unlink us
777 */
778 unlink(t);
779 tsk_mc_data(t)->mc_job.is_ghost = 0;
780 tsk_mc_data(t)->mc_job.ghost_budget = 0;
781 prepare_for_next_period(t);
782 }
783 if (is_released(t, litmus_clock()))
784 sched_trace_task_release(t);
785 /* requeue
786 * But don't requeue a blocking task. */
787 if (is_running(t))
788 mc_job_arrival(t);
789}
790
791/* watchdog_timeout - this function is called when a watchdog timer expires.
792 *
793 * Acquires global lock
794 */
795 474
796static enum hrtimer_restart watchdog_timeout(struct hrtimer *timer)
797{
798 struct watchdog_timer* wt = container_of(timer,
799 struct watchdog_timer,
800 timer);
801 unsigned long flags;
802 struct task_struct* task = wt->task;
803 raw_spin_lock_irqsave(&global_lock, flags);
804 /*If we have triggered, we know the budget must have expired.*/
805 /*This needs to run first, so it doesn't look to job_completion like
806 * we have an active timer.
807 */
808 wt->task = NULL;
809 tsk_mc_data(task)->mc_job.ghost_budget = 0;
810 job_completion(task, 0);
811 TRACE_TASK(task, "Watchdog timeout\n");
812 raw_spin_unlock_irqrestore(&global_lock, flags); 475 raw_spin_unlock_irqrestore(&global_lock, flags);
813 return HRTIMER_NORESTART;
814} 476}
815 477
816 478/*
817/* mc_tick - this function is called for every local timer 479 * Ghost time accounting.
818 * interrupt. 480 * TODO: remove
819 *
820 * checks whether the current task has expired and checks
821 * whether we need to preempt it if it has not expired
822 * Called from LITMUS core
823 * Locks when calling update_ghost_time(t)
824 * Just sets reschedule flags on task and CPU and request_exit_np flag on task
825 */ 481 */
826static void mc_tick(struct task_struct* t) 482static void mc_tick(struct task_struct* t)
827{ 483{
828 unsigned long flags; 484 unsigned long flags;
829 if (is_ghost(t)) { 485 if (is_realtime(t) && is_ghost(t)) {
830 raw_spin_lock_irqsave(&global_lock, flags); 486 raw_spin_lock_irqsave(&global_lock, flags);
831 update_ghost_time(t); 487 update_ghost_time(t);
832 raw_spin_unlock_irqrestore(&global_lock, flags); 488 raw_spin_unlock_irqrestore(&global_lock, flags);
833 } 489 }
834 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
835 if (!is_np(t)) {
836 /* np tasks will be preempted when they become
837 * preemptable again
838 */
839 set_tsk_need_resched(t);
840 set_will_schedule();
841 TRACE("mc_scheduler_tick: "
842 "%d is preemptable "
843 " => FORCE_RESCHED\n", t->pid);
844 } else if (is_user_np(t)) {
845 TRACE("mc_scheduler_tick: "
846 "%d is non-preemptable, "
847 "preemption delayed.\n", t->pid);
848 request_exit_np(t);
849 }
850 }
851}
852
853/* Getting schedule() right is a bit tricky. schedule() may not make any
854 * assumptions on the state of the current task since it may be called for a
855 * number of reasons. The reasons include a scheduler_tick() determined that it
856 * was necessary, because sys_exit_np() was called, because some Linux
857 * subsystem determined so, or even (in the worst case) because there is a bug
858 * hidden somewhere. Thus, we must take extreme care to determine what the
859 * current state is.
860 *
861 * The CPU could currently be scheduling a task (or not), be linked (or not).
862 *
863 * The following assertions for the scheduled task could hold:
864 *
865 * - !is_running(scheduled) // the job blocks
866 * - scheduled->timeslice == 0 // the job completed (forcefully)
867 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
868 * - linked != scheduled // we need to reschedule (for any reason)
869 * - is_np(scheduled) // rescheduling must be delayed,
870 * sys_exit_np must be requested
871 *
872 * Any of these can occur together.
873 *
874 *
875 * Called by LITMUS core
876 * No lock required by caller
877 * Obtains global lock
878 * can call unlink(), request_exit_np(), job_completion(), __take_ready()
879 * modifies next, scheduled->scheduled_on, linked->scheduled_on
880 * Releases global lock
881 */
882static struct task_struct* mc_schedule(struct task_struct * prev)
883{
884 cpu_entry_t* entry = &__get_cpu_var(mc_cpu_entries);
885 int out_of_time, sleep, preempt, np, exists, blocks;
886 struct task_struct* next = NULL;
887 struct task_struct* ready_task = NULL;
888 enum crit_level ready_crit;
889 int i;
890
891#ifdef CONFIG_RELEASE_MASTER
892 /* Bail out early if we are the release master.
893 * The release master never schedules any real-time tasks.
894 */
895 if (mc_release_master == entry->cpu) {
896 sched_state_task_picked();
897 return NULL;
898 }
899#endif
900
901 raw_spin_lock(&global_lock);
902 clear_will_schedule();
903
904 /* sanity checking */
905 BUG_ON(entry->scheduled && entry->scheduled != prev);
906 BUG_ON(entry->scheduled && !is_realtime(prev));
907 BUG_ON(is_realtime(prev) && !entry->scheduled);
908
909 /* (0) Determine state */
910 exists = entry->scheduled != NULL;
911 blocks = exists && !is_running(entry->scheduled);
912 out_of_time = exists && budget_enforced(entry->scheduled) &&
913 budget_exhausted(entry->scheduled);
914 np = exists && is_np(entry->scheduled);
915 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
916 preempt = entry->scheduled != entry->linked;
917
918#ifdef WANT_ALL_SCHED_EVENTS
919 TRACE_TASK(prev, "invoked mc_schedule.\n");
920#endif
921
922 if (exists)
923 TRACE_TASK(prev,
924 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
925 "state:%d sig:%d\n",
926 blocks, out_of_time, np, sleep, preempt,
927 prev->state, signal_pending(prev));
928 if (entry->linked && preempt)
929 TRACE_TASK(prev, "will be preempted by %s/%d\n",
930 entry->linked->comm, entry->linked->pid);
931
932
933 /* If a task blocks we have no choice but to reschedule.
934 */
935 if (blocks)
936 unlink(entry->scheduled);
937
938 /* Request a sys_exit_np() call if we would like to preempt but cannot.
939 * We need to make sure to update the link structure anyway in case
940 * that we are still linked. Multiple calls to request_exit_np() don't
941 * hurt.
942 */
943 if (np && (out_of_time || preempt || sleep)) {
944 unlink(entry->scheduled);
945 request_exit_np(entry->scheduled);
946 }
947
948 /* Any task that is preemptable and either exhausts its execution
949 * budget or wants to sleep completes. We may have to reschedule after
950 * this. Don't do a job completion if we block (can't have timers running
951 * for blocked jobs). Preemption go first for the same reason.
952 */
953 if (!np && (out_of_time || sleep) && !blocks && !preempt)
954 job_completion(entry->scheduled, !sleep);
955
956 /* Link pending task if we became unlinked.
957 */
958 if (!entry->linked) {
959 if (!entry->ghost_tasks[CRIT_LEVEL_A]) {
960 ready_task = __take_ready(local_a_queue);
961 ready_crit = CRIT_LEVEL_A;
962 if (ready_task && is_ghost(ready_task)) {
963 link_task_to_cpu(ready_task, entry);
964 ready_task = NULL;
965 }
966 }
967 if (!ready_task && !entry->ghost_tasks[CRIT_LEVEL_B]) {
968 ready_task = __take_ready(local_b_queue);
969 ready_crit = CRIT_LEVEL_B;
970 if (ready_task && is_ghost(ready_task)) {
971 link_task_to_cpu(ready_task, entry);
972 ready_task = NULL;
973 }
974 }
975 if (!ready_task && !entry->ghost_tasks[CRIT_LEVEL_C]) {
976 ready_task = __take_ready(&crit_c);
977 ready_crit = CRIT_LEVEL_C;
978 if (ready_task && is_ghost(ready_task)) {
979 link_task_to_cpu(ready_task, entry);
980 ready_task = NULL;
981 }
982 }
983 if (!ready_task && !entry->ghost_tasks[CRIT_LEVEL_D]) {
984 ready_task = __take_ready(&crit_d);
985 ready_crit = CRIT_LEVEL_D;
986 if (ready_task && is_ghost(ready_task)) {
987 link_task_to_cpu(ready_task, entry);
988 ready_task = NULL;
989 }
990 }
991 if (!ready_task) {
992 /* set to something invalid? */
993 ready_crit = NUM_CRIT_LEVELS;
994 }
995 for (i = ready_crit; i < NUM_CRIT_LEVELS; i++) {
996 if (entry->ghost_tasks[i])
997 requeue(entry->ghost_tasks[i]);
998 }
999 link_task_to_cpu(ready_task, entry);
1000 if (ready_task)
1001 TRACE_TASK(ready_task,
1002 "Linked task inside scheduler\n");
1003 }
1004
1005 /* The final scheduling decision. Do we need to switch for some reason?
1006 * If linked is different from scheduled, then select linked as next.
1007 */
1008 if ((!np || blocks) &&
1009 entry->linked != entry->scheduled) {
1010 /* Schedule a linked job? */
1011 if (entry->linked) {
1012 entry->linked->rt_param.scheduled_on = entry->cpu;
1013 next = entry->linked;
1014 }
1015 if (entry->scheduled) {
1016 /* not gonna be scheduled soon */
1017 entry->scheduled->rt_param.scheduled_on = NO_CPU;
1018 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
1019 }
1020 } else
1021 /* Only override Linux scheduler if we have a real-time task
1022 * scheduled that needs to continue.
1023 */
1024 if (exists)
1025 next = prev;
1026
1027 sched_state_task_picked();
1028
1029 /*TODO: Update name of locking, reflect that we're locking all queues*/
1030 raw_spin_unlock(&global_lock);
1031
1032#ifdef WANT_ALL_SCHED_EVENTS
1033 TRACE("global_lock released, next=0x%p\n", next);
1034
1035 if (next)
1036 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
1037 else if (exists && !next)
1038 TRACE("becomes idle at %llu.\n", litmus_clock());
1039#endif
1040
1041
1042 return next;
1043}
1044
1045
1046/* _finish_switch - we just finished the switch away from prev
1047 * Called by LITMUS core
1048 * No locks
1049 */
1050static void mc_finish_switch(struct task_struct *prev)
1051{
1052 cpu_entry_t* entry = &__get_cpu_var(mc_cpu_entries);
1053
1054 entry->scheduled = is_realtime(current) ? current : NULL;
1055#ifdef WANT_ALL_SCHED_EVENTS
1056 TRACE_TASK(prev, "switched away from\n");
1057#endif
1058} 490}
1059 491
1060 492/*
1061/* Prepare a task for running in RT mode 493 * Setup new mixed-criticality task.
1062 * Called by LITMUS core
1063 * No lock required by caller
1064 * Obtains lock and calls mc_job_arrival before releasing lock
1065 */ 494 */
1066static void mc_task_new(struct task_struct *t, int on_rq, int running) 495static void mc_task_new(struct task_struct *t, int on_rq, int running)
1067{ 496{
1068 unsigned long flags; 497 unsigned long flags;
1069 cpu_entry_t* entry; 498 cpu_entry_t* entry;
1070 enum crit_level lvl; 499 enum crit_level level;
1071 500
1072 TRACE("mixed crit: task new %d\n", t->pid); 501 TRACE("New mixed criticality task %d\n", t->pid);
1073 502
1074 raw_spin_lock_irqsave(&global_lock, flags); 503 raw_spin_lock_irqsave(&global_lock, flags);
1075 504
1076 lvl = tsk_mc_crit(t); 505 /* Assign domain */
1077 t->rt_param.domain = 506 level = tsk_mc_crit(t);
1078 (lvl == CRIT_LEVEL_A) ? remote_a_queue(get_partition(t)) : 507 if (level < CRIT_LEVEL_C) {
1079 (lvl == CRIT_LEVEL_B) ? remote_b_queue(get_partition(t)) : 508 entry = cpus[get_partition(t)];
1080 (lvl == CRIT_LEVEL_C) ? &crit_c : &crit_d; 509 } else {
510 entry = cpus[task_cpu(t)];
511 }
512 level = tsk_mc_crit(t);
513 t->rt_param._domain = entry->crit_entries[level].domain;
1081 514
1082 /* setup job params */ 515 /* Setup job params */
1083 release_at(t, litmus_clock()); 516 release_at(t, litmus_clock());
1084 tsk_mc_data(t)->mc_job.ghost_budget = 0; 517 tsk_mc_data(t)->mc_job.ghost_budget = 0;
1085 tsk_mc_data(t)->mc_job.is_ghost = 0; 518 tsk_mc_data(t)->mc_job.is_ghost = 0;
1086 519
1087 if (running) { 520 if (running) {
1088 entry = &per_cpu(mc_cpu_entries, task_cpu(t));
1089 BUG_ON(entry->scheduled); 521 BUG_ON(entry->scheduled);
1090 522 entry->scheduled = t;
1091#ifdef CONFIG_RELEASE_MASTER 523 tsk_rt(t)->scheduled_on = entry->cpu;
1092 if (entry->cpu != mc_release_master) {
1093#endif
1094 entry->scheduled = t;
1095 tsk_rt(t)->scheduled_on = task_cpu(t);
1096#ifdef CONFIG_RELEASE_MASTER
1097 } else {
1098 /* do not schedule on release master */
1099 preempt(entry); /* force resched */
1100 tsk_rt(t)->scheduled_on = NO_CPU;
1101 }
1102#endif
1103 } else { 524 } else {
1104 t->rt_param.scheduled_on = NO_CPU; 525 t->rt_param.scheduled_on = NO_CPU;
1105 } 526 }
1106 t->rt_param.linked_on = NO_CPU; 527 t->rt_param.linked_on = NO_CPU;
528
529 job_arrival(t);
1107 530
1108 mc_job_arrival(t);
1109 raw_spin_unlock_irqrestore(&global_lock, flags); 531 raw_spin_unlock_irqrestore(&global_lock, flags);
1110} 532}
1111 533
1112/* Called by LITMUS core 534/*
1113 * No lock required by caller 535 * Add task back into its domain and cause any necessary preemptions.
1114 * Obtains lock and calls mc_job_arrival before releasing lock
1115 */ 536 */
1116static void mc_task_wake_up(struct task_struct *task) 537static void mc_task_wake_up(struct task_struct *task)
1117{ 538{
1118 unsigned long flags; 539 unsigned long flags;
1119 lt_t now; 540 lt_t now;
1120 541
1121 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
1122
1123 raw_spin_lock_irqsave(&global_lock, flags); 542 raw_spin_lock_irqsave(&global_lock, flags);
543 TRACE_TASK(task, "Wakes up");
544
1124 /* We need to take suspensions because of semaphores into 545 /* We need to take suspensions because of semaphores into
1125 * account! If a job resumes after being suspended due to acquiring 546 * account! If a job resumes after being suspended due to acquiring
1126 * a semaphore, it should never be treated as a new job release. 547 * a semaphore, it should never be treated as a new job release.
@@ -1130,114 +551,167 @@ static void mc_task_wake_up(struct task_struct *task)
1130 } else { 551 } else {
1131 now = litmus_clock(); 552 now = litmus_clock();
1132 if (is_tardy(task, now)) { 553 if (is_tardy(task, now)) {
1133 /* new sporadic release */ 554 /* New sporadic release */
1134 release_at(task, now); 555 release_at(task, now);
1135 sched_trace_task_release(task); 556 sched_trace_task_release(task);
1136 } 557 } else {
1137 else {
1138 if (task->rt.time_slice) { 558 if (task->rt.time_slice) {
1139 /* came back in time before deadline 559 /* Came back in time before deadline */
1140 */
1141 set_rt_flags(task, RT_F_RUNNING); 560 set_rt_flags(task, RT_F_RUNNING);
1142 } 561 }
1143 } 562 }
1144 } 563 }
1145 /*Delay job arrival if we still have an active ghost job*/ 564
1146 if (!is_ghost(task)) 565 if (!is_ghost(task))
1147 mc_job_arrival(task); 566 job_arrival(task);
567
1148 raw_spin_unlock_irqrestore(&global_lock, flags); 568 raw_spin_unlock_irqrestore(&global_lock, flags);
1149} 569}
1150 570
1151/* Called by LITMUS core 571/*
1152 * No lock required by caller 572 * Remove task from global state to prevent it from being linked / run
1153 * Obtains and releases global lock 573 * on any CPU.
1154 */ 574 */
1155static void mc_task_block(struct task_struct *t) 575static void mc_task_block(struct task_struct *task)
1156{ 576{
1157 unsigned long flags; 577 unsigned long flags;
578 raw_spin_lock_irqsave(&global_lock, flags);
579 TRACE_TASK(task, "Block at %llu", litmus_clock());
1158 580
1159 TRACE_TASK(t, "block at %llu\n", litmus_clock()); 581 remove_from_all(task);
1160 582
1161 /* unlink if necessary */
1162 raw_spin_lock_irqsave(&global_lock, flags);
1163 unlink(t);
1164 raw_spin_unlock_irqrestore(&global_lock, flags); 583 raw_spin_unlock_irqrestore(&global_lock, flags);
1165
1166 BUG_ON(!is_realtime(t));
1167} 584}
1168 585
1169 586/*
1170/* Called by LITMUS core 587 * Remove task from the system.
1171 * No lock required by caller
1172 * Obtains and releases global lock
1173 */ 588 */
1174static void mc_task_exit(struct task_struct * t) 589static void mc_task_exit(struct task_struct *task)
1175{ 590{
1176 unsigned long flags; 591 unsigned long flags;
1177 592
1178 /* unlink if necessary */ 593 BUG_ON(!is_realtime(task));
594 TRACE_TASK(task, "RIP");
595
1179 raw_spin_lock_irqsave(&global_lock, flags); 596 raw_spin_lock_irqsave(&global_lock, flags);
1180 unlink(t); 597 remove_from_all(task);
1181 if (tsk_rt(t)->scheduled_on != NO_CPU) { 598 if (tsk_rt(task)->scheduled_on != NO_CPU) {
1182 mc_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; 599 cpus[tsk_rt(task)->scheduled_on]->scheduled = NULL;
1183 tsk_rt(t)->scheduled_on = NO_CPU; 600 tsk_rt(task)->scheduled_on = NO_CPU;
1184 } 601 }
1185 raw_spin_unlock_irqrestore(&global_lock, flags); 602 raw_spin_unlock_irqrestore(&global_lock, flags);
1186
1187 BUG_ON(!is_realtime(t));
1188 TRACE_TASK(t, "RIP\n");
1189} 603}
1190 604
1191static long mc_admit_task(struct task_struct* tsk) 605/*
606 * Return true if the task is a valid mixed-criticality task.
607 */
608static long mc_admit_task(struct task_struct* task)
1192{ 609{
1193 if (!tsk_mc_data(tsk)) 610 if (!tsk_mc_data(task)) {
1194 { 611 printk(KERN_WARNING "Tried to admit task with no criticality "
1195 printk(KERN_WARNING "tried to admit task with no criticality "
1196 "level\n"); 612 "level\n");
1197 return -EINVAL; 613 return -EINVAL;
1198 } 614 }
1199 printk(KERN_INFO "admitted task with criticality level %d\n", 615 if (tsk_mc_crit(task) < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
1200 tsk_mc_crit(tsk)); 616 printk(KERN_WARNING "Tried to admit partitioned task with no "
617 "partition\n");
618 return -EINVAL;
619 }
620 printk(KERN_INFO "Admitted task with criticality level %d\n",
621 tsk_mc_crit(task));
1201 return 0; 622 return 0;
1202} 623}
1203 624
1204static long mc_activate_plugin(void) 625/*
626 * Return next task which should be scheduled.
627 */
628static struct task_struct* mc_schedule(struct task_struct * prev)
1205{ 629{
1206 int cpu; 630 domain_t *dom;
1207 cpu_entry_t *entry; 631 crit_cpu_entry_t *ce;
632 cpu_entry_t* entry = cpus[smp_processor_id()];
633 int i, out_of_time, sleep, preempt, exists, blocks, global;
634 struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL;
1208 635
1209 bheap_init(&mc_cpu_heap_c); 636 raw_spin_lock(&global_lock);
1210 bheap_init(&mc_cpu_heap_d);
1211#ifdef CONFIG_RELEASE_MASTER
1212 crit_c.release_master = atomic_read(&release_master_cpu);
1213 crit_d.release_master = crit_c.release_master;
1214#endif
1215 637
1216 for_each_online_cpu(cpu) { 638 /* Sanity checking */
1217 entry = &per_cpu(mc_cpu_entries, cpu); 639 BUG_ON(entry->scheduled && entry->scheduled != prev);
1218 bheap_node_init(&entry->hn_c, entry); 640 BUG_ON(entry->scheduled && !is_realtime(prev));
1219 bheap_node_init(&entry->hn_d, entry); 641 BUG_ON(is_realtime(prev) && !entry->scheduled);
1220 atomic_set(&entry->will_schedule, 0); 642
1221 entry->linked = NULL; 643 /* Determine state */
1222 entry->scheduled = NULL; 644 exists = entry->scheduled != NULL;
1223#ifdef CONFIG_RELEASE_MASTER 645 blocks = exists && !is_running(entry->scheduled);
1224 if (cpu != mc_release_master) { 646 out_of_time = exists && budget_enforced(entry->scheduled) &&
1225#endif 647 budget_exhausted(entry->scheduled);
1226 TRACE("MC: Initializing CPU #%d.\n", cpu); 648 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
1227 update_cpu_position(entry); 649 global = exists && is_global_task(entry->scheduled);
1228#ifdef CONFIG_RELEASE_MASTER 650 preempt = entry->scheduled != entry->linked;
1229 } else { 651
1230 TRACE("MC: CPU %d is release master.\n", cpu); 652 if (exists) {
653 entry->scheduled->rt_param.scheduled_on = NO_CPU;
654 TRACE_TASK(prev,
655 "blocks:%d out_of_time:%d sleep:%d preempt:%d "
656 "state:%d sig:%d global:%d",
657 blocks, out_of_time, sleep, preempt,
658 prev->state, signal_pending(prev), global);
659 }
660
661 /* If a task blocks we have no choice but to reschedule */
662 if (blocks)
663 remove_from_all(entry->scheduled);
664 /* Any task which exhausts its budget or sleeps waiting for its next
665 * period completes unless its execution has been forcibly stopped.
666 */
667 if ((out_of_time || sleep) && !blocks && !preempt)
668 job_completion(entry->scheduled, !sleep);
669 /* Global scheduled tasks must wait for a deschedule before they
670 * can rejoin a global domain. See comment in job_arrival.
671 */
672 else if (global && preempt && !blocks)
673 job_arrival(entry->scheduled);
674
675 /* Pick next task if none is linked */
676 if (!entry->linked) {
677 for (i = 0; i < NUM_CRIT_LEVELS && !ready_task; i++) {
678 ce = &entry->crit_entries[i];
679 dom = ce->domain;
680 dtask = dom->peek_ready(dom);
681 if (!ce->linked && dtask) {
682 dom->take_ready(dom);
683 link_task_to_crit(ce, dtask);
684 ready_task = (is_ghost(dtask)) ? NULL : dtask;
685 }
1231 } 686 }
1232#endif 687 if (ready_task)
688 link_task_to_cpu(entry, ready_task);
1233 } 689 }
1234 return 0; 690
691 /* Schedule next task */
692 next = entry->linked;
693 entry->scheduled = next;
694 if (entry->scheduled)
695 entry->scheduled->rt_param.scheduled_on = entry->cpu;
696
697 sched_state_task_picked();
698
699 raw_spin_unlock(&global_lock);
700
701 if (next)
702 TRACE_TASK(next, "Scheduled at %llu", litmus_clock());
703 else if (exists && !next)
704 TRACE("Becomes idle at %llu\n", litmus_clock());
705
706 return next;
1235} 707}
1236 708
1237/* Plugin object */ 709/* **************************************************************************
710 * Initialization
711 * ************************************************************************** */
712
1238static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = { 713static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
1239 .plugin_name = "MC", 714 .plugin_name = "MC",
1240 .finish_switch = mc_finish_switch,
1241 .tick = mc_tick, 715 .tick = mc_tick,
1242 .task_new = mc_task_new, 716 .task_new = mc_task_new,
1243 .complete_job = complete_job, 717 .complete_job = complete_job,
@@ -1246,48 +720,112 @@ static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
1246 .task_wake_up = mc_task_wake_up, 720 .task_wake_up = mc_task_wake_up,
1247 .task_block = mc_task_block, 721 .task_block = mc_task_block,
1248 .admit_task = mc_admit_task, 722 .admit_task = mc_admit_task,
1249 .activate_plugin = mc_activate_plugin,
1250}; 723};
1251 724
725/* Initialize values here so that they are allocated with the module
726 * and destroyed when the module is unloaded.
727 */
728DEFINE_PER_CPU(cpu_entry_t, _mc_cpus);
729DEFINE_PER_CPU(domain_data_t, _mc_crit_a);
730DEFINE_PER_CPU(domain_data_t, _mc_crit_b);
731static domain_data_t _mc_crit_c, _mc_crit_d;
732struct bheap _mc_heap_c, _mc_heap_d;
733struct bheap_node _mc_nodes_c[NR_CPUS], _mc_nodes_d[NR_CPUS];
734
735static void init_crit_entry(crit_cpu_entry_t *ce, enum crit_level level,
736 domain_data_t *dom_data,
737 struct bheap_node *node)
738{
739 ce->level = level;
740 ce->linked = NULL;
741 ce->node = node;
742 ce->domain = &dom_data->domain;
1252 743
1253static int __init init_mc(void) 744 hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
745 ce->timer.function = mc_ghost_exhausted;
746}
747
748static void init_local_domain(cpu_entry_t *entry, domain_data_t *dom_data,
749 enum crit_level level)
750{
751 dom_data->heap = NULL;
752 dom_data->crit_entry = &entry->crit_entries[level];
753 init_crit_entry(dom_data->crit_entry, level, dom_data, NULL);
754}
755
756static void init_global_domain(domain_data_t *dom_data, enum crit_level level,
757 struct bheap *heap, struct bheap_node *nodes)
1254{ 758{
1255 int cpu; 759 int cpu;
1256 int i;
1257 cpu_entry_t *entry; 760 cpu_entry_t *entry;
1258 struct watchdog_timer *timer; 761 crit_cpu_entry_t *ce;
1259 762 struct bheap_node *node;
1260 bheap_init(&mc_cpu_heap_c); 763
1261 bheap_init(&mc_cpu_heap_d); 764 dom_data->crit_entry = NULL;
1262 /* initialize CPU state */ 765 dom_data->heap = heap;
1263 for (cpu = 0; cpu < NR_CPUS; cpu++) { 766 bheap_init(heap);
1264 entry = &per_cpu(mc_cpu_entries, cpu); 767
1265 mc_cpus[cpu] = entry; 768 for_each_online_cpu(cpu) {
1266 atomic_set(&entry->will_schedule, 0); 769 entry = cpus[cpu];
1267 entry->cpu = cpu; 770 node = &nodes[cpu];
1268 entry->hn_c = &mc_heap_node_c[cpu]; 771 ce = &entry->crit_entries[level];
1269 entry->hn_d = &mc_heap_node_d[cpu]; 772
1270 bheap_node_init(&entry->hn_c, entry); 773 init_crit_entry(ce, level, dom_data, node);
1271 bheap_node_init(&entry->hn_d, entry); 774
1272 for (i = CRIT_LEVEL_A; i < NUM_CRIT_LEVELS; i++) { 775 bheap_node_init(&ce->node, ce);
1273 timer = ghost_timer(cpu, i); 776 bheap_insert(cpu_lower_prio, heap, node);
1274 hrtimer_init(&timer->timer, CLOCK_MONOTONIC,
1275 HRTIMER_MODE_ABS);
1276 timer->timer.function = watchdog_timeout;
1277 }
1278 }
1279 mc_edf_domain_init(&crit_c, NULL, mc_release_jobs);
1280 mc_edf_domain_init(&crit_d, NULL, mc_release_jobs);
1281 for (i = 0; i < NR_CPUS; i++) {
1282 mc_edf_domain_init(remote_b_queue(i), NULL,
1283 mc_release_jobs);
1284 } 777 }
1285 for (i = 0; i < NR_CPUS; i++) { 778}
1286 mc_edf_domain_init(remote_a_queue(i), NULL, 779
1287 mc_release_jobs); 780static inline void init_edf_domain(domain_t *dom)
781{
782 pd_domain_init(dom, edf_ready_order, NULL,
783 mc_release_jobs, mc_preempt_needed,
784 edf_higher_prio);
785}
786
787static int __init init_mc(void)
788{
789 int cpu;
790 cpu_entry_t *entry;
791 domain_data_t *dom_data;
792
793 raw_spin_lock_init(&global_lock);
794
795 for_each_online_cpu(cpu) {
796 entry = &per_cpu(_mc_cpus, cpu);
797 cpus[cpu] = entry;
798
799 entry->cpu = cpu;
800 entry->scheduled = NULL;
801 entry->linked = NULL;
802
803 /* CRIT_LEVEL_A */
804 dom_data = &per_cpu(_mc_crit_a, cpu);
805 init_local_domain(entry, dom_data, CRIT_LEVEL_A);
806 init_edf_domain(&dom_data->domain);
807 dom_data->domain.name = "LVL-A";
808
809 /* CRIT_LEVEL_B */
810 dom_data = &per_cpu(_mc_crit_b, cpu);
811 init_local_domain(entry, dom_data, CRIT_LEVEL_B);
812 init_edf_domain(&dom_data->domain);
813 dom_data->domain.name = "LVL-B";
1288 } 814 }
815
816 /* CRIT_LEVEL_C */
817 init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
818 &_mc_heap_c, _mc_nodes_c);
819 init_edf_domain(&_mc_crit_c.domain);
820 _mc_crit_c.domain.name = "LVL-C";
821
822 /* CRIT_LEVEL_D */
823 init_global_domain(&_mc_crit_d, CRIT_LEVEL_D,
824 &_mc_heap_d, _mc_nodes_d);
825 init_edf_domain(&_mc_crit_d.domain);
826 _mc_crit_d.domain.name = "LVL-D";
827
1289 return register_sched_plugin(&mc_plugin); 828 return register_sched_plugin(&mc_plugin);
1290} 829}
1291 830
1292
1293module_init(init_mc); 831module_init(init_mc);