diff options
-rw-r--r-- | include/trace/events/litmus.h | 4 | ||||
-rw-r--r-- | include/trace/ftrace.h | 5 | ||||
-rw-r--r-- | litmus/sched_mc.c | 1369 | ||||
-rw-r--r-- | litmus/sched_mc_ce.c | 6 |
4 files changed, 1378 insertions, 6 deletions
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h index 5ca4bef205f0..4ad053eac27d 100644 --- a/include/trace/events/litmus.h +++ b/include/trace/events/litmus.h | |||
@@ -310,7 +310,9 @@ TRACE_EVENT(litmus_server_switch_away, | |||
310 | 310 | ||
311 | TRACE_EVENT(litmus_server_release, | 311 | TRACE_EVENT(litmus_server_release, |
312 | 312 | ||
313 | TP_PROTO(int sid, unsigned int job, unsigned long long release, unsigned long long deadline), | 313 | TP_PROTO(int sid, unsigned int job, |
314 | unsigned long long release, | ||
315 | unsigned long long deadline), | ||
314 | 316 | ||
315 | TP_ARGS(sid, job, release, deadline), | 317 | TP_ARGS(sid, job, release, deadline), |
316 | 318 | ||
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 533c49f48047..4d6f3474e8fa 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/ftrace_event.h> | 19 | #include <linux/ftrace_event.h> |
20 | #include <litmus/litmus.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * DECLARE_EVENT_CLASS can be used to add a generic function | 23 | * DECLARE_EVENT_CLASS can be used to add a generic function |
@@ -54,7 +55,7 @@ | |||
54 | #define __string(item, src) __dynamic_array(char, item, -1) | 55 | #define __string(item, src) __dynamic_array(char, item, -1) |
55 | 56 | ||
56 | #undef TP_STRUCT__entry | 57 | #undef TP_STRUCT__entry |
57 | #define TP_STRUCT__entry(args...) args | 58 | #define TP_STRUCT__entry(args...) args __field( unsigned long long, __rt_ts ) |
58 | 59 | ||
59 | #undef DECLARE_EVENT_CLASS | 60 | #undef DECLARE_EVENT_CLASS |
60 | #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ | 61 | #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ |
@@ -507,7 +508,7 @@ static inline notrace int ftrace_get_offsets_##call( \ | |||
507 | strcpy(__get_str(dst), src); | 508 | strcpy(__get_str(dst), src); |
508 | 509 | ||
509 | #undef TP_fast_assign | 510 | #undef TP_fast_assign |
510 | #define TP_fast_assign(args...) args | 511 | #define TP_fast_assign(args...) args; __entry->__rt_ts = litmus_clock(); |
511 | 512 | ||
512 | #undef TP_perf_assign | 513 | #undef TP_perf_assign |
513 | #define TP_perf_assign(args...) | 514 | #define TP_perf_assign(args...) |
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c new file mode 100644 index 000000000000..41f02ee3e6ca --- /dev/null +++ b/litmus/sched_mc.c | |||
@@ -0,0 +1,1369 @@ | |||
1 | /** | ||
2 | * litmus/sched_mc.c | ||
3 | * | ||
4 | * Implementation of the Mixed Criticality scheduling algorithm. | ||
5 | * | ||
6 | * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010) | ||
7 | * | ||
8 | * Absolute first: relative time spent doing different parts of release | ||
9 | * and scheduling overhead needs to be measured and graphed. | ||
10 | * | ||
11 | * Domain locks should be more fine-grained. There is no reason to hold the | ||
12 | * ready-queue lock when adding a task to the release-queue. | ||
13 | * | ||
14 | * The levels should be converted to linked-lists so that they are more | ||
15 | * adaptable and need not be identical on all processors. | ||
16 | * | ||
17 | * The interaction between remove_from_all and other concurrent operations | ||
18 | * should be re-examined. If a job_completion and a preemption happen | ||
19 | * simultaneously, a task could be requeued, removed, then requeued again. | ||
20 | * | ||
21 | * Level-C tasks should be able to swap CPUs a-la GSN-EDF. They should also | ||
22 | * try and swap with the last CPU they were on. This could be complicated for | ||
23 | * ghost tasks. | ||
24 | * | ||
25 | * Locking for timer-merging could be infinitely more fine-grained. A second | ||
26 | * hash could select a lock to use based on queue slot. This approach might | ||
27 | * also help with add_release in rt_domains. | ||
28 | * | ||
29 | * It should be possible to reserve a CPU for ftdumping. | ||
30 | * | ||
31 | * The real_deadline business seems sloppy. | ||
32 | * | ||
33 | * The amount of data in the header file should be cut down. The use of the | ||
34 | * header file in general needs to be re-examined. | ||
35 | * | ||
36 | * The plugin needs to be modified so that it doesn't freeze when it is | ||
37 | * deactivated in a VM. | ||
38 | * | ||
39 | * The locking in check_for_preempt is not fine-grained enough. | ||
40 | * | ||
41 | * The size of the structures could be smaller. Debugging info might be | ||
42 | * excessive as things currently stand. | ||
43 | * | ||
44 | * The macro can_requeue has been expanded too much. Anything beyond | ||
45 | * scheduled_on is a hack! | ||
46 | * | ||
47 | * Domain names (rt_domain) are still clumsy. | ||
48 | * | ||
49 | * Should BE be moved into the kernel? This will require benchmarking. | ||
50 | */ | ||
51 | |||
52 | #include <linux/spinlock.h> | ||
53 | #include <linux/percpu.h> | ||
54 | #include <linux/sched.h> | ||
55 | #include <linux/hrtimer.h> | ||
56 | #include <linux/slab.h> | ||
57 | #include <linux/module.h> | ||
58 | #include <linux/poison.h> | ||
59 | #include <linux/pid.h> | ||
60 | |||
61 | #include <litmus/litmus.h> | ||
62 | #include <litmus/trace.h> | ||
63 | #include <litmus/jobs.h> | ||
64 | #include <litmus/sched_plugin.h> | ||
65 | #include <litmus/edf_common.h> | ||
66 | #include <litmus/sched_trace.h> | ||
67 | #include <litmus/domain.h> | ||
68 | #include <litmus/bheap.h> | ||
69 | #include <litmus/event_group.h> | ||
70 | #include <litmus/budget.h> | ||
71 | |||
72 | #include <litmus/sched_mc.h> | ||
73 | #include <litmus/ce_domain.h> | ||
74 | |||
75 | /** | ||
76 | * struct cpu_entry - State of a CPU for the entire MC system | ||
77 | * @cpu CPU id | ||
78 | * @scheduled Task that is physically running | ||
79 | * @linked Task that should be running / is logically running | ||
80 | * @lock For serialization | ||
81 | * @crit_entries Array of CPU state per criticality level | ||
82 | * @redir List of redirected work for this CPU. | ||
83 | * @redir_lock Lock for @redir. | ||
84 | * @event_group Event group for timer merging. | ||
85 | */ | ||
86 | struct cpu_entry { | ||
87 | int cpu; | ||
88 | struct task_struct* scheduled; | ||
89 | struct task_struct* will_schedule; | ||
90 | struct task_struct* linked; | ||
91 | raw_spinlock_t lock; | ||
92 | struct crit_entry crit_entries[NUM_CRIT_LEVELS]; | ||
93 | #ifdef CONFIG_PLUGIN_MC_REDIRECT | ||
94 | struct list_head redir; | ||
95 | raw_spinlock_t redir_lock; | ||
96 | #endif | ||
97 | #ifdef CONFIG_MERGE_TIMERS | ||
98 | struct event_group *event_group; | ||
99 | #endif | ||
100 | }; | ||
101 | |||
102 | DEFINE_PER_CPU(struct cpu_entry, cpus); | ||
103 | #ifdef CONFIG_RELEASE_MASTER | ||
104 | static int interrupt_cpu; | ||
105 | #endif | ||
106 | |||
107 | #define domain_data(dom) (container_of(dom, struct domain_data, domain)) | ||
108 | #define is_global(dom) (domain_data(dom)->heap) | ||
109 | #define is_global_task(t) (is_global(get_task_domain(t))) | ||
110 | #define can_use(ce) \ | ||
111 | ((ce)->state == CS_ACTIVE || (ce->state == CS_ACTIVATE)) | ||
112 | #define can_requeue(t) \ | ||
113 | ((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \ | ||
114 | !is_queued(t) && /* Not gonna be linked */ \ | ||
115 | (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU)) | ||
116 | #define entry_level(e) \ | ||
117 | (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1) | ||
118 | #define crit_cpu(ce) \ | ||
119 | (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries)) | ||
120 | #define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level]) | ||
121 | #define TRACE_ENTRY(e, fmt, args...) \ | ||
122 | STRACE("P%d, linked=" TS " " fmt, e->cpu, TA(e->linked), ##args) | ||
123 | #define TRACE_CRIT_ENTRY(ce, fmt, args...) \ | ||
124 | STRACE("%s P%d, linked=" TS " " fmt, \ | ||
125 | (ce)->domain->name, crit_cpu(ce)->cpu, TA((ce)->linked), ##args) | ||
126 | |||
127 | static int sid(struct crit_entry *ce) | ||
128 | { | ||
129 | int level = ce->level * num_online_cpus() + crit_cpu(ce)->cpu + 1; | ||
130 | BUG_ON(level >= 0); | ||
131 | return -level; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Sort CPUs within a global domain's heap. | ||
136 | */ | ||
137 | static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b) | ||
138 | { | ||
139 | struct domain *domain; | ||
140 | struct crit_entry *first, *second; | ||
141 | struct task_struct *first_link, *second_link; | ||
142 | |||
143 | first = a->value; | ||
144 | second = b->value; | ||
145 | first_link = first->linked; | ||
146 | second_link = second->linked; | ||
147 | |||
148 | if (first->state == CS_REMOVED || second->state == CS_REMOVED) { | ||
149 | /* Removed entries go at the back of the heap */ | ||
150 | return first->state != CS_REMOVED && | ||
151 | second->state != CS_REMOVED; | ||
152 | } else if (!first_link || !second_link) { | ||
153 | /* Entry with nothing scheduled is lowest priority */ | ||
154 | return second_link && !first_link; | ||
155 | } else { | ||
156 | /* Sort by deadlines of tasks */ | ||
157 | domain = get_task_domain(first_link); | ||
158 | return domain->higher_prio(second_link, first_link); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Return true if the domain has a higher priority ready task. The @curr | ||
164 | * task must belong to the domain. | ||
165 | */ | ||
166 | static int mc_preempt_needed(struct domain *dom, struct task_struct* curr) | ||
167 | { | ||
168 | struct task_struct *next = dom->peek_ready(dom); | ||
169 | if (!next || !curr) { | ||
170 | return next && !curr; | ||
171 | } else { | ||
172 | BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr)); | ||
173 | return get_task_domain(next)->higher_prio(next, curr); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Update crit entry position in a global heap. Caller must hold | ||
179 | * @ce's domain lock. | ||
180 | */ | ||
181 | static inline void update_crit_position(struct crit_entry *ce) | ||
182 | { | ||
183 | struct bheap *heap; | ||
184 | if (is_global(ce->domain)) { | ||
185 | heap = domain_data(ce->domain)->heap; | ||
186 | BUG_ON(!heap); | ||
187 | BUG_ON(!bheap_node_in_heap(ce->node)); | ||
188 | bheap_delete(cpu_lower_prio, heap, ce->node); | ||
189 | bheap_insert(cpu_lower_prio, heap, ce->node); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Update crit entry position in a global heap if it has been marked | ||
195 | * for update. Caller must hold @ce's domain lock. | ||
196 | */ | ||
197 | static void fix_crit_position(struct crit_entry *ce) | ||
198 | { | ||
199 | if (is_global(ce->domain)) { | ||
200 | if (CS_ACTIVATE == ce->state) { | ||
201 | ce->state = CS_ACTIVE; | ||
202 | update_crit_position(ce); | ||
203 | } else if (CS_REMOVE == ce->state) { | ||
204 | ce->state = CS_REMOVED; | ||
205 | update_crit_position(ce); | ||
206 | } | ||
207 | } | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Return next CPU which should preempted or NULL if the domain has no | ||
212 | * preemptable CPUs. Caller must hold the @dom lock. | ||
213 | */ | ||
214 | static inline struct crit_entry* lowest_prio_cpu(struct domain *dom) | ||
215 | { | ||
216 | struct bheap *heap = domain_data(dom)->heap; | ||
217 | struct bheap_node* hn; | ||
218 | struct crit_entry *ce, *res = NULL; | ||
219 | do { | ||
220 | hn = bheap_peek(cpu_lower_prio, heap); | ||
221 | ce = (hn) ? hn->value : NULL; | ||
222 | if (ce) { | ||
223 | if (ce->state == CS_ACTIVE) | ||
224 | res = ce; | ||
225 | else if (ce->state == CS_REMOVED) | ||
226 | ce = NULL; | ||
227 | else | ||
228 | fix_crit_position(ce); | ||
229 | } | ||
230 | } while (ce && !res); | ||
231 | return res; | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * Cancel ghost timer. | ||
236 | */ | ||
237 | static inline void cancel_ghost(struct crit_entry *ce) | ||
238 | { | ||
239 | #ifdef CONFIG_MERGE_TIMERS | ||
240 | cancel_event(&ce->event); | ||
241 | #else | ||
242 | hrtimer_try_to_cancel(&ce->timer); | ||
243 | #endif | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Arm ghost timer. Will merge timers if the option is specified. | ||
248 | */ | ||
249 | static inline void arm_ghost(struct crit_entry *ce, lt_t fire) | ||
250 | { | ||
251 | #ifdef CONFIG_MERGE_TIMERS | ||
252 | add_event(crit_cpu(ce)->event_group, &ce->event, fire); | ||
253 | #else | ||
254 | __hrtimer_start_range_ns(&ce->timer, | ||
255 | ns_to_ktime(fire), | ||
256 | 0 /* delta */, | ||
257 | HRTIMER_MODE_ABS_PINNED, | ||
258 | 0 /* no wakeup */); | ||
259 | #endif | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * Time accounting for ghost tasks. | ||
264 | * Must be called before a decision is made involving the task's budget. | ||
265 | */ | ||
266 | static void update_ghost_time(struct task_struct *p) | ||
267 | { | ||
268 | u64 clock = litmus_clock(); | ||
269 | u64 delta = clock - p->se.exec_start; | ||
270 | BUG_ON(!is_ghost(p)); | ||
271 | if (unlikely ((s64)delta < 0)) { | ||
272 | delta = 0; | ||
273 | TRACE_MC_TASK(p, "WARNING: negative time delta\n"); | ||
274 | } | ||
275 | if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) { | ||
276 | TRACE_MC_TASK(p, "Ghost job could have ended\n"); | ||
277 | tsk_mc_data(p)->mc_job.ghost_budget = 0; | ||
278 | p->se.exec_start = clock; | ||
279 | } else { | ||
280 | TRACE_MC_TASK(p, "Ghost job updated, but didn't finish\n"); | ||
281 | tsk_mc_data(p)->mc_job.ghost_budget -= delta; | ||
282 | p->se.exec_start = clock; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * link_task_to_crit() - Logically run a task at a criticality level. | ||
288 | * Caller must hold @ce's CPU lock. | ||
289 | */ | ||
290 | static void link_task_to_crit(struct crit_entry *ce, | ||
291 | struct task_struct *task) | ||
292 | { | ||
293 | lt_t when_to_fire; | ||
294 | |||
295 | TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task)); | ||
296 | BUG_ON(!can_use(ce) && task); | ||
297 | BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU); | ||
298 | BUG_ON(task && is_global(ce->domain) && | ||
299 | !bheap_node_in_heap(ce->node)); | ||
300 | |||
301 | /* Unlink last task */ | ||
302 | if (ce->linked) { | ||
303 | TRACE_MC_TASK(ce->linked, "Unlinking\n"); | ||
304 | ce->linked->rt_param.linked_on = NO_CPU; | ||
305 | if (is_ghost(ce->linked)) { | ||
306 | cancel_ghost(ce); | ||
307 | if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) { | ||
308 | /* Job isn't finished, so do accounting */ | ||
309 | update_ghost_time(ce->linked); | ||
310 | } | ||
311 | } | ||
312 | sched_trace_server_switch_away(sid(ce), 0, ce->linked->pid); | ||
313 | } | ||
314 | |||
315 | /* Actually link task */ | ||
316 | ce->linked = task; | ||
317 | if (task) { | ||
318 | task->rt_param.linked_on = crit_cpu(ce)->cpu; | ||
319 | if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) { | ||
320 | /* There is a level-A timer that will force a | ||
321 | * preemption, so we don't set this for level-A | ||
322 | * tasks. Otherwise reset the budget timer. | ||
323 | */ | ||
324 | task->se.exec_start = litmus_clock(); | ||
325 | when_to_fire = task->se.exec_start + | ||
326 | tsk_mc_data(task)->mc_job.ghost_budget; | ||
327 | arm_ghost(ce, when_to_fire); | ||
328 | |||
329 | sched_trace_server_switch_to(sid(ce), 0, 0); | ||
330 | } else { | ||
331 | sched_trace_server_switch_to(sid(ce), 0, task->pid); | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | |||
336 | static void check_for_preempt(struct domain*); | ||
337 | |||
338 | /** | ||
339 | * job_arrival() - Called when a task re-enters the system. | ||
340 | * Caller must hold no locks. | ||
341 | */ | ||
342 | static void job_arrival(struct task_struct *task) | ||
343 | { | ||
344 | struct domain *dom = get_task_domain(task); | ||
345 | |||
346 | TRACE_MC_TASK(task, "Job arriving\n"); | ||
347 | BUG_ON(!task); | ||
348 | |||
349 | raw_spin_lock(dom->lock); | ||
350 | if (can_requeue(task)) { | ||
351 | BUG_ON(task->rt_param.linked_on != NO_CPU); | ||
352 | dom->requeue(dom, task); | ||
353 | check_for_preempt(dom); | ||
354 | } else { | ||
355 | /* If a global task is scheduled on one cpu, it CANNOT | ||
356 | * be requeued into a global domain. Another cpu might | ||
357 | * dequeue the global task before it is descheduled, | ||
358 | * causing the system to crash when the task is scheduled | ||
359 | * in two places simultaneously. | ||
360 | */ | ||
361 | TRACE_MC_TASK(task, "Delayed arrival of scheduled task\n"); | ||
362 | } | ||
363 | raw_spin_unlock(dom->lock); | ||
364 | } | ||
365 | |||
366 | /** | ||
367 | * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will | ||
368 | * redirect a lower priority job_arrival work to the interrupt_cpu. | ||
369 | */ | ||
370 | static void low_prio_arrival(struct task_struct *task) | ||
371 | { | ||
372 | struct cpu_entry *entry; | ||
373 | |||
374 | /* Race conditions! */ | ||
375 | if (!can_requeue(task)) return; | ||
376 | |||
377 | #ifdef CONFIG_PLUGIN_MC_REDIRECT | ||
378 | if (!is_global_task(task)) | ||
379 | goto arrive; | ||
380 | if (smp_processor_id() != interrupt_cpu) { | ||
381 | entry = &__get_cpu_var(cpus); | ||
382 | raw_spin_lock(&entry->redir_lock); | ||
383 | TRACE_MC_TASK(task, "Adding to redirect queue\n"); | ||
384 | list_add(&tsk_rt(task)->list, &entry->redir); | ||
385 | raw_spin_unlock(&entry->redir_lock); | ||
386 | litmus_reschedule(interrupt_cpu); | ||
387 | } else | ||
388 | #endif | ||
389 | { | ||
390 | arrive: | ||
391 | job_arrival(task); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | #ifdef CONFIG_PLUGIN_MC_REDIRECT | ||
396 | /** | ||
397 | * fix_global_levels() - Execute redirected job arrivals on this cpu. | ||
398 | */ | ||
399 | static void fix_global_levels(void) | ||
400 | { | ||
401 | int c; | ||
402 | struct cpu_entry *e; | ||
403 | struct list_head *pos, *safe; | ||
404 | struct task_struct *t; | ||
405 | |||
406 | STRACE("Fixing global levels\n"); | ||
407 | for_each_online_cpu(c) { | ||
408 | e = &per_cpu(cpus, c); | ||
409 | raw_spin_lock(&e->redir_lock); | ||
410 | list_for_each_safe(pos, safe, &e->redir) { | ||
411 | t = list_entry(pos, struct task_struct, rt_param.list); | ||
412 | BUG_ON(!t); | ||
413 | TRACE_MC_TASK(t, "Dequeued redirected job\n"); | ||
414 | list_del_init(pos); | ||
415 | job_arrival(t); | ||
416 | } | ||
417 | raw_spin_unlock(&e->redir_lock); | ||
418 | } | ||
419 | } | ||
420 | #endif | ||
421 | |||
422 | /** | ||
423 | * link_task_to_cpu() - Logically run a task on a CPU. | ||
424 | * The task must first have been linked to one of the CPU's crit_entries. | ||
425 | * Caller must hold the entry lock. | ||
426 | */ | ||
427 | static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task) | ||
428 | { | ||
429 | int i = entry_level(entry); | ||
430 | struct crit_entry *ce; | ||
431 | TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu); | ||
432 | BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu); | ||
433 | BUG_ON(task && is_ghost(task)); | ||
434 | |||
435 | if (entry->linked) { | ||
436 | sched_trace_server_switch_away(-entry->linked->pid, | ||
437 | get_server_job(entry->linked), | ||
438 | entry->linked->pid); | ||
439 | } | ||
440 | |||
441 | if (task){ | ||
442 | set_rt_flags(task, RT_F_RUNNING); | ||
443 | sched_trace_server_switch_to(-task->pid, | ||
444 | get_server_job(task), | ||
445 | task->pid); | ||
446 | } | ||
447 | entry->linked = task; | ||
448 | |||
449 | /* Higher criticality crit entries are now usable */ | ||
450 | for (; i < entry_level(entry) + 1; i++) { | ||
451 | ce = &entry->crit_entries[i]; | ||
452 | if (!can_use(ce)) { | ||
453 | ce->state = CS_ACTIVATE; | ||
454 | } | ||
455 | } | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * preempt() - Preempt a logically running task with a higher priority one. | ||
460 | * @dom Domain from which to draw higher priority task | ||
461 | * @ce CPU criticality level to preempt | ||
462 | * | ||
463 | * Caller must hold the lock for @dom and @ce's CPU lock. | ||
464 | */ | ||
465 | static void preempt(struct domain *dom, struct crit_entry *ce) | ||
466 | { | ||
467 | struct task_struct *task = dom->take_ready(dom); | ||
468 | struct cpu_entry *entry = crit_cpu(ce); | ||
469 | struct task_struct *old = ce->linked; | ||
470 | |||
471 | BUG_ON(!task); | ||
472 | TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task)); | ||
473 | |||
474 | /* Per-domain preemption */ | ||
475 | link_task_to_crit(ce, task); | ||
476 | if (old && can_requeue(old)) { | ||
477 | dom->requeue(dom, old); | ||
478 | } | ||
479 | update_crit_position(ce); | ||
480 | |||
481 | /* Preempt actual execution if this is a running task */ | ||
482 | if (!is_ghost(task)) { | ||
483 | link_task_to_cpu(entry, task); | ||
484 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
485 | } else if (old && old == entry->linked) { | ||
486 | /* Preempted a running task with a ghost job. Null needs to be | ||
487 | * running. | ||
488 | */ | ||
489 | link_task_to_cpu(entry, NULL); | ||
490 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
491 | } | ||
492 | } | ||
493 | |||
494 | /** | ||
495 | * update_crit_levels() - Update criticality entries for the new cpu state. | ||
496 | * This should be called after a new task has been linked to @entry. | ||
497 | * The caller must hold the @entry->lock, but this method will release it. | ||
498 | */ | ||
499 | static void update_crit_levels(struct cpu_entry *entry) | ||
500 | { | ||
501 | int i, global_preempted; | ||
502 | struct crit_entry *ce; | ||
503 | struct task_struct *readmit[NUM_CRIT_LEVELS]; | ||
504 | enum crit_level level = entry_level(entry); | ||
505 | |||
506 | /* Remove lower priority tasks from the entry */ | ||
507 | for (i = level + 1; i < NUM_CRIT_LEVELS; i++) { | ||
508 | ce = &entry->crit_entries[i]; | ||
509 | |||
510 | global_preempted = ce->linked && | ||
511 | /* This task is running on a cpu */ | ||
512 | ce->linked->rt_param.scheduled_on == entry->cpu && | ||
513 | /* But it was preempted */ | ||
514 | ce->linked != entry->linked && | ||
515 | /* And it is an eligible global task */ | ||
516 | !is_ghost(ce->linked) && is_global(ce->domain); | ||
517 | |||
518 | /* Do not readmit global tasks which are preempted! These can't | ||
519 | * ever be re-admitted until they are descheduled for reasons | ||
520 | * explained in job_arrival. | ||
521 | */ | ||
522 | readmit[i] = (!global_preempted) ? ce->linked : NULL; | ||
523 | |||
524 | ce->state = CS_REMOVE; | ||
525 | if (ce->linked) | ||
526 | link_task_to_crit(ce, NULL); | ||
527 | } | ||
528 | /* Need to unlock so we can access domains */ | ||
529 | raw_spin_unlock(&entry->lock); | ||
530 | |||
531 | /* Re-admit tasks to the system */ | ||
532 | for (i = level + 1; i < NUM_CRIT_LEVELS; i++) { | ||
533 | ce = &entry->crit_entries[i]; | ||
534 | if (readmit[i]) { | ||
535 | low_prio_arrival(readmit[i]); | ||
536 | } | ||
537 | } | ||
538 | } | ||
539 | |||
540 | /** | ||
541 | * check_for_preempt() - Causes a preemption if higher-priority tasks are ready. | ||
542 | * Caller must hold domain lock. | ||
543 | * Makes gigantic nasty assumption that there is 1 global criticality level, | ||
544 | * and it is the last one in each list, so it doesn't call update_crit.. | ||
545 | */ | ||
546 | static void check_for_preempt(struct domain *dom) | ||
547 | { | ||
548 | int recheck = 1; | ||
549 | struct cpu_entry *entry; | ||
550 | struct crit_entry *ce; | ||
551 | |||
552 | if (is_global(dom)) { | ||
553 | /* Loop until we find a non-preemptable CPU */ | ||
554 | while ((ce = lowest_prio_cpu(dom)) && recheck) { | ||
555 | entry = crit_cpu(ce); | ||
556 | recheck = 1; | ||
557 | |||
558 | /* Cache next task */ | ||
559 | dom->peek_ready(dom); | ||
560 | |||
561 | raw_spin_lock(&entry->lock); | ||
562 | if (!can_use(ce)) | ||
563 | /* CPU disabled while locking! */ | ||
564 | fix_crit_position(ce); | ||
565 | else if (dom->preempt_needed(dom, ce->linked)) | ||
566 | /* Success! Check for more preemptions */ | ||
567 | preempt(dom, ce); | ||
568 | else { | ||
569 | /* Failure! */ | ||
570 | recheck = 0; | ||
571 | TRACE_CRIT_ENTRY(ce, "Stopped global check\n"); | ||
572 | } | ||
573 | raw_spin_unlock(&entry->lock); | ||
574 | } | ||
575 | } else /* Partitioned */ { | ||
576 | ce = domain_data(dom)->crit_entry; | ||
577 | entry = crit_cpu(ce); | ||
578 | |||
579 | /* Cache next task */ | ||
580 | dom->peek_ready(dom); | ||
581 | |||
582 | raw_spin_lock(&entry->lock); | ||
583 | if (can_use(ce) && dom->preempt_needed(dom, ce->linked)) { | ||
584 | preempt(dom, ce); | ||
585 | update_crit_levels(entry); | ||
586 | } else { | ||
587 | raw_spin_unlock(&entry->lock); | ||
588 | } | ||
589 | } | ||
590 | } | ||
591 | |||
592 | /** | ||
593 | * remove_from_all() - Logically remove a task from all structures. | ||
594 | * Caller must hold no locks. | ||
595 | */ | ||
596 | static void remove_from_all(struct task_struct* task) | ||
597 | { | ||
598 | int update = 0; | ||
599 | struct cpu_entry *entry; | ||
600 | struct crit_entry *ce; | ||
601 | struct domain *dom = get_task_domain(task); | ||
602 | |||
603 | TRACE_MC_TASK(task, "Removing from everything\n"); | ||
604 | BUG_ON(!task); | ||
605 | |||
606 | raw_spin_lock(dom->lock); | ||
607 | |||
608 | /* Remove the task from any CPU state */ | ||
609 | if (task->rt_param.linked_on != NO_CPU) { | ||
610 | entry = &per_cpu(cpus, task->rt_param.linked_on); | ||
611 | raw_spin_lock(&entry->lock); | ||
612 | |||
613 | /* Unlink only if task is still linked post lock */ | ||
614 | ce = &entry->crit_entries[tsk_mc_crit(task)]; | ||
615 | if (task->rt_param.linked_on != NO_CPU) { | ||
616 | BUG_ON(ce->linked != task); | ||
617 | link_task_to_crit(ce, NULL); | ||
618 | update_crit_position(ce); | ||
619 | if (!is_ghost(task) && entry->linked == task) { | ||
620 | update = 1; | ||
621 | link_task_to_cpu(entry, NULL); | ||
622 | } | ||
623 | } else { | ||
624 | TRACE_MC_TASK(task, "Unlinked before we got lock!\n"); | ||
625 | } | ||
626 | if (update) | ||
627 | update_crit_levels(entry); | ||
628 | else | ||
629 | raw_spin_unlock(&entry->lock); | ||
630 | } else { | ||
631 | TRACE_MC_TASK(task, "Not linked to anything\n"); | ||
632 | } | ||
633 | |||
634 | /* Ensure the task isn't returned by its domain */ | ||
635 | dom->remove(dom, task); | ||
636 | |||
637 | raw_spin_unlock(dom->lock); | ||
638 | } | ||
639 | |||
640 | /** | ||
641 | * job_completion() - Update task state and re-enter it into the system. | ||
642 | * Converts tasks which have completed their execution early into ghost jobs. | ||
643 | * Caller must hold no locks. | ||
644 | */ | ||
645 | static void job_completion(struct task_struct *task, int forced) | ||
646 | { | ||
647 | int behind; | ||
648 | TRACE_MC_TASK(task, "Completed\n"); | ||
649 | |||
650 | /* Logically stop the task execution */ | ||
651 | set_rt_flags(task, RT_F_SLEEP); | ||
652 | remove_from_all(task); | ||
653 | |||
654 | /* Level-A tasks cannot ever get behind */ | ||
655 | behind = tsk_mc_crit(task) != CRIT_LEVEL_A && behind_server(task); | ||
656 | |||
657 | if (!forced && !is_ghost(task)) { | ||
658 | /* Task voluntarily ceased execution. Move on to next period */ | ||
659 | task_release(task); | ||
660 | sched_trace_task_completion(task, forced); | ||
661 | |||
662 | /* Convert to ghost job */ | ||
663 | tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task); | ||
664 | tsk_mc_data(task)->mc_job.is_ghost = 1; | ||
665 | } | ||
666 | |||
667 | /* If the task has no ghost budget, convert back from ghost. | ||
668 | * If the task is behind, undo ghost conversion so that it | ||
669 | * can catch up. | ||
670 | */ | ||
671 | if (behind || tsk_mc_data(task)->mc_job.ghost_budget == 0) { | ||
672 | TRACE_MC_TASK(task, "Not a ghost task\n"); | ||
673 | tsk_mc_data(task)->mc_job.is_ghost = 0; | ||
674 | tsk_mc_data(task)->mc_job.ghost_budget = 0; | ||
675 | } | ||
676 | |||
677 | /* If server has run out of budget, wait until next release */ | ||
678 | if (budget_exhausted(task)) { | ||
679 | sched_trace_server_completion(-task->pid, | ||
680 | get_server_job(task)); | ||
681 | server_release(task); | ||
682 | } | ||
683 | |||
684 | /* Requeue non-blocking tasks */ | ||
685 | if (is_running(task)) | ||
686 | job_arrival(task); | ||
687 | } | ||
688 | |||
689 | /** | ||
690 | * mc_ghost_exhausted() - Complete logically running ghost task. | ||
691 | */ | ||
692 | #ifdef CONFIG_MERGE_TIMERS | ||
693 | static void mc_ghost_exhausted(struct rt_event *e) | ||
694 | { | ||
695 | struct crit_entry *ce = container_of(e, struct crit_entry, event); | ||
696 | #else | ||
697 | static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer) | ||
698 | { | ||
699 | struct crit_entry *ce = container_of(timer, struct crit_entry, timer); | ||
700 | #endif | ||
701 | |||
702 | unsigned long flags; | ||
703 | struct task_struct *tmp = NULL; | ||
704 | |||
705 | local_irq_save(flags); | ||
706 | TRACE("Ghost exhausted\n"); | ||
707 | TRACE_CRIT_ENTRY(ce, "Firing here\n"); | ||
708 | |||
709 | /* Due to race conditions, we cannot just set the linked | ||
710 | * task's budget to 0 as it may no longer be the task | ||
711 | * for which this timer was armed. Instead, update the running | ||
712 | * task time and see if this causes exhaustion. | ||
713 | */ | ||
714 | raw_spin_lock(&crit_cpu(ce)->lock); | ||
715 | if (ce->linked && is_ghost(ce->linked)) { | ||
716 | update_ghost_time(ce->linked); | ||
717 | if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) { | ||
718 | tmp = ce->linked; | ||
719 | } | ||
720 | } | ||
721 | raw_spin_unlock(&crit_cpu(ce)->lock); | ||
722 | |||
723 | if (tmp) | ||
724 | job_completion(tmp, 0); | ||
725 | |||
726 | local_irq_restore(flags); | ||
727 | #ifndef CONFIG_MERGE_TIMERS | ||
728 | return HRTIMER_NORESTART; | ||
729 | #endif | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * The MC-CE common timer callback code for merged and non-merged timers. | ||
734 | * Returns the next time the timer should fire. | ||
735 | */ | ||
736 | static lt_t __ce_timer_function(struct ce_dom_data *ce_data) | ||
737 | { | ||
738 | struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A); | ||
739 | struct domain *dom = ce->domain; | ||
740 | struct task_struct *old_link = NULL; | ||
741 | lt_t next_timer_abs; | ||
742 | |||
743 | TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu); | ||
744 | |||
745 | raw_spin_lock(dom->lock); | ||
746 | |||
747 | raw_spin_lock(&crit_cpu(ce)->lock); | ||
748 | if (ce->linked && | ||
749 | ce->linked == ce_data->should_schedule && | ||
750 | is_ghost(ce->linked)) | ||
751 | { | ||
752 | old_link = ce->linked; | ||
753 | tsk_mc_data(ce->linked)->mc_job.ghost_budget = 0; | ||
754 | link_task_to_crit(ce, NULL); | ||
755 | } | ||
756 | raw_spin_unlock(&crit_cpu(ce)->lock); | ||
757 | |||
758 | next_timer_abs = mc_ce_timer_callback_common(dom); | ||
759 | |||
760 | /* Job completion will check for preemptions by means of calling job | ||
761 | * arrival if the task is not blocked */ | ||
762 | if (NULL != old_link) { | ||
763 | STRACE("old_link " TS " so will call job completion\n", TA(old_link)); | ||
764 | raw_spin_unlock(dom->lock); | ||
765 | job_completion(old_link, 0); | ||
766 | } else { | ||
767 | STRACE("old_link was null, so will call check for preempt\n"); | ||
768 | raw_spin_unlock(dom->lock); | ||
769 | check_for_preempt(dom); | ||
770 | } | ||
771 | return next_timer_abs; | ||
772 | } | ||
773 | |||
774 | #ifdef CONFIG_MERGE_TIMERS | ||
775 | static void ce_timer_function(struct rt_event *e) | ||
776 | { | ||
777 | struct ce_dom_data *ce_data = | ||
778 | container_of(e, struct ce_dom_data, event); | ||
779 | unsigned long flags; | ||
780 | lt_t next_timer_abs; | ||
781 | |||
782 | TS_LVLA_RELEASE_START; | ||
783 | |||
784 | local_irq_save(flags); | ||
785 | next_timer_abs = __ce_timer_function(ce_data); | ||
786 | add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs); | ||
787 | local_irq_restore(flags); | ||
788 | |||
789 | TS_LVLA_RELEASE_END; | ||
790 | } | ||
791 | #else /* else to CONFIG_MERGE_TIMERS */ | ||
792 | static enum hrtimer_restart ce_timer_function(struct hrtimer *timer) | ||
793 | { | ||
794 | struct ce_dom_data *ce_data = | ||
795 | container_of(timer, struct ce_dom_data, timer); | ||
796 | unsigned long flags; | ||
797 | lt_t next_timer_abs; | ||
798 | |||
799 | TS_LVLA_RELEASE_START; | ||
800 | |||
801 | local_irq_save(flags); | ||
802 | next_timer_abs = __ce_timer_function(ce_data); | ||
803 | hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs)); | ||
804 | local_irq_restore(flags); | ||
805 | |||
806 | TS_LVLA_RELEASE_END; | ||
807 | |||
808 | return HRTIMER_RESTART; | ||
809 | } | ||
810 | #endif /* CONFIG_MERGE_TIMERS */ | ||
811 | |||
812 | |||
813 | /** | ||
814 | * mc_release_jobs() - Add heap of tasks to the system, check for preemptions. | ||
815 | */ | ||
816 | static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
817 | { | ||
818 | unsigned long flags; | ||
819 | struct task_struct *first = bheap_peek(rt->order, tasks)->value; | ||
820 | struct domain *dom = get_task_domain(first); | ||
821 | |||
822 | raw_spin_lock_irqsave(dom->lock, flags); | ||
823 | TRACE(TS "Jobs released\n", TA(first)); | ||
824 | __merge_ready(rt, tasks); | ||
825 | check_for_preempt(dom); | ||
826 | raw_spin_unlock_irqrestore(dom->lock, flags); | ||
827 | } | ||
828 | |||
829 | /** | ||
830 | * ms_task_new() - Setup new mixed-criticality task. | ||
831 | * Assumes that there are no partitioned domains after level B. | ||
832 | */ | ||
833 | static void mc_task_new(struct task_struct *t, int on_rq, int running) | ||
834 | { | ||
835 | unsigned long flags; | ||
836 | struct cpu_entry* entry; | ||
837 | enum crit_level level = tsk_mc_crit(t); | ||
838 | char name[TASK_COMM_LEN]; | ||
839 | strcpy(name, "rtspin"); | ||
840 | |||
841 | local_irq_save(flags); | ||
842 | TRACE("New mixed criticality task %d\n", t->pid); | ||
843 | |||
844 | /* Assign domain */ | ||
845 | if (level < CRIT_LEVEL_C) | ||
846 | entry = &per_cpu(cpus, get_partition(t)); | ||
847 | else | ||
848 | entry = &per_cpu(cpus, task_cpu(t)); | ||
849 | t->rt_param._domain = entry->crit_entries[level].domain; | ||
850 | |||
851 | sched_trace_container_param(t->pid, name); | ||
852 | sched_trace_server_param(-t->pid, t->pid, | ||
853 | get_exec_cost(t), get_rt_period(t)); | ||
854 | |||
855 | /* Setup job params */ | ||
856 | release_at(t, litmus_clock()); | ||
857 | tsk_mc_data(t)->mc_job.ghost_budget = 0; | ||
858 | tsk_mc_data(t)->mc_job.is_ghost = 0; | ||
859 | if (running) { | ||
860 | BUG_ON(entry->scheduled); | ||
861 | entry->scheduled = t; | ||
862 | tsk_rt(t)->scheduled_on = entry->cpu; | ||
863 | } else { | ||
864 | t->rt_param.scheduled_on = NO_CPU; | ||
865 | } | ||
866 | t->rt_param.linked_on = NO_CPU; | ||
867 | |||
868 | |||
869 | job_arrival(t); | ||
870 | |||
871 | local_irq_restore(flags); | ||
872 | } | ||
873 | |||
874 | /** | ||
875 | * mc_task_new() - Add task back into its domain check for preemptions. | ||
876 | */ | ||
877 | static void mc_task_wake_up(struct task_struct *task) | ||
878 | { | ||
879 | unsigned long flags; | ||
880 | lt_t now = litmus_clock(); | ||
881 | local_irq_save(flags); | ||
882 | |||
883 | TRACE(TS " wakes up\n", TA(task)); | ||
884 | if (is_tardy(task, now)) { | ||
885 | /* Task missed its last release */ | ||
886 | release_at(task, now); | ||
887 | sched_trace_task_release(task); | ||
888 | } | ||
889 | if (!is_ghost(task)) | ||
890 | job_arrival(task); | ||
891 | |||
892 | local_irq_restore(flags); | ||
893 | } | ||
894 | |||
895 | /** | ||
896 | * mc_task_block() - Remove task from state to prevent it being run anywhere. | ||
897 | */ | ||
898 | static void mc_task_block(struct task_struct *task) | ||
899 | { | ||
900 | unsigned long flags; | ||
901 | local_irq_save(flags); | ||
902 | TRACE(TS " blocks\n", TA(task)); | ||
903 | remove_from_all(task); | ||
904 | local_irq_restore(flags); | ||
905 | } | ||
906 | |||
907 | /** | ||
908 | * mc_task_exit() - Remove task from the system. | ||
909 | */ | ||
910 | static void mc_task_exit(struct task_struct *task) | ||
911 | { | ||
912 | unsigned long flags; | ||
913 | local_irq_save(flags); | ||
914 | BUG_ON(!is_realtime(task)); | ||
915 | TRACE(TS " RIP\n", TA(task)); | ||
916 | |||
917 | remove_from_all(task); | ||
918 | if (tsk_rt(task)->scheduled_on != NO_CPU) { | ||
919 | per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL; | ||
920 | tsk_rt(task)->scheduled_on = NO_CPU; | ||
921 | } | ||
922 | |||
923 | if (CRIT_LEVEL_A == tsk_mc_crit(task)) | ||
924 | mc_ce_task_exit_common(task); | ||
925 | |||
926 | local_irq_restore(flags); | ||
927 | } | ||
928 | |||
929 | /** | ||
930 | * mc_admit_task() - Return true if the task is valid. | ||
931 | * Assumes there are no partitioned levels after level B. | ||
932 | */ | ||
933 | static long mc_admit_task(struct task_struct* task) | ||
934 | { | ||
935 | const enum crit_level crit = tsk_mc_crit(task); | ||
936 | long ret; | ||
937 | if (!tsk_mc_data(task)) { | ||
938 | printk(KERN_WARNING "Tried to admit task with no criticality " | ||
939 | "level\n"); | ||
940 | ret = -EINVAL; | ||
941 | goto out; | ||
942 | } | ||
943 | if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) { | ||
944 | printk(KERN_WARNING "Tried to admit partitioned task with no " | ||
945 | "partition\n"); | ||
946 | ret = -EINVAL; | ||
947 | goto out; | ||
948 | } | ||
949 | if (crit == CRIT_LEVEL_A) { | ||
950 | ret = mc_ce_admit_task_common(task); | ||
951 | if (ret) | ||
952 | goto out; | ||
953 | } | ||
954 | printk(KERN_INFO "Admitted task with criticality level %d\n", | ||
955 | tsk_mc_crit(task)); | ||
956 | ret = 0; | ||
957 | out: | ||
958 | return ret; | ||
959 | } | ||
960 | |||
961 | /** | ||
962 | * mc_schedule() - Return next task which should be scheduled. | ||
963 | */ | ||
964 | static struct task_struct* mc_schedule(struct task_struct* prev) | ||
965 | { | ||
966 | unsigned long flags; | ||
967 | struct domain *dom; | ||
968 | struct crit_entry *ce; | ||
969 | struct cpu_entry* entry = &__get_cpu_var(cpus); | ||
970 | int i, out_of_time, sleep, preempt, exists, blocks, global, lower; | ||
971 | struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL; | ||
972 | |||
973 | local_irq_save(flags); | ||
974 | |||
975 | /* Litmus gave up because it couldn't access the stack of the CPU | ||
976 | * on which will_schedule was migrating from. Requeue it. | ||
977 | * This really only happens in VMs. | ||
978 | */ | ||
979 | if (entry->will_schedule && entry->will_schedule != prev) { | ||
980 | entry->will_schedule->rt_param.scheduled_on = NO_CPU; | ||
981 | low_prio_arrival(entry->will_schedule); | ||
982 | } | ||
983 | |||
984 | raw_spin_lock(&entry->lock); | ||
985 | |||
986 | /* Sanity checking */ | ||
987 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
988 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
989 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
990 | |||
991 | /* Determine state */ | ||
992 | exists = entry->scheduled != NULL; | ||
993 | blocks = exists && !is_running(entry->scheduled); | ||
994 | out_of_time = exists && budget_enforced(entry->scheduled) && | ||
995 | budget_exhausted(entry->scheduled); | ||
996 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
997 | global = exists && is_global_task(entry->scheduled); | ||
998 | preempt = entry->scheduled != entry->linked; | ||
999 | lower = exists && preempt && entry->linked && | ||
1000 | tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked); | ||
1001 | |||
1002 | TRACE(TS " blocks:%d out_of_time:%d sleep:%d preempt:%d\n", | ||
1003 | TA(prev), blocks, out_of_time, sleep, preempt); | ||
1004 | |||
1005 | if (exists) | ||
1006 | prev->rt_param.scheduled_on = NO_CPU; | ||
1007 | |||
1008 | raw_spin_unlock(&entry->lock); | ||
1009 | |||
1010 | |||
1011 | #ifdef CONFIG_PLUGIN_MC_REDIRECT | ||
1012 | if (smp_processor_id() == interrupt_cpu) | ||
1013 | fix_global_levels(); | ||
1014 | #endif | ||
1015 | |||
1016 | /* If a task blocks we have no choice but to reschedule */ | ||
1017 | if (blocks) | ||
1018 | remove_from_all(entry->scheduled); | ||
1019 | /* Any task which exhausts its budget or sleeps waiting for its next | ||
1020 | * period completes unless its execution has been forcibly stopped. | ||
1021 | */ | ||
1022 | if ((out_of_time || sleep) && !blocks)/* && !preempt)*/ | ||
1023 | job_completion(entry->scheduled, !sleep); | ||
1024 | /* Global scheduled tasks must wait for a deschedule before they | ||
1025 | * can rejoin the global state. Rejoin them here. | ||
1026 | */ | ||
1027 | else if (global && preempt && !blocks) { | ||
1028 | if (lower) | ||
1029 | low_prio_arrival(entry->scheduled); | ||
1030 | else | ||
1031 | job_arrival(entry->scheduled); | ||
1032 | } | ||
1033 | |||
1034 | /* Pick next task if none is linked */ | ||
1035 | raw_spin_lock(&entry->lock); | ||
1036 | for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) { | ||
1037 | ce = &entry->crit_entries[i]; | ||
1038 | dom = ce->domain; | ||
1039 | |||
1040 | /* Swap locks. We cannot acquire a domain lock while | ||
1041 | * holding an entry lock or deadlocks will happen. | ||
1042 | */ | ||
1043 | raw_spin_unlock(&entry->lock); | ||
1044 | raw_spin_lock(dom->lock); | ||
1045 | |||
1046 | /* Do domain stuff before grabbing CPU locks */ | ||
1047 | dtask = dom->peek_ready(dom); | ||
1048 | fix_crit_position(ce); | ||
1049 | |||
1050 | raw_spin_lock(&entry->lock); | ||
1051 | |||
1052 | if (!entry->linked && !ce->linked && dtask && can_use(ce)) { | ||
1053 | dom->take_ready(dom); | ||
1054 | link_task_to_crit(ce, dtask); | ||
1055 | update_crit_position(ce); | ||
1056 | ready_task = (is_ghost(dtask)) ? NULL : dtask; | ||
1057 | |||
1058 | /* Task found! */ | ||
1059 | if (ready_task) { | ||
1060 | link_task_to_cpu(entry, ready_task); | ||
1061 | raw_spin_unlock(dom->lock); | ||
1062 | update_crit_levels(entry); | ||
1063 | raw_spin_lock(&entry->lock); | ||
1064 | continue; | ||
1065 | } | ||
1066 | } | ||
1067 | raw_spin_unlock(dom->lock); | ||
1068 | } | ||
1069 | |||
1070 | /* Schedule next task */ | ||
1071 | next = entry->linked; | ||
1072 | if (entry->linked) | ||
1073 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
1074 | entry->will_schedule = entry->linked; | ||
1075 | sched_state_task_picked(); | ||
1076 | |||
1077 | raw_spin_unlock(&entry->lock); | ||
1078 | local_irq_restore(flags); | ||
1079 | if (next) { | ||
1080 | TRACE_MC_TASK(next, "Picked this task\n"); | ||
1081 | } else if (exists && !next) | ||
1082 | TRACE_ENTRY(entry, "Becomes idle at %llu\n", litmus_clock()); | ||
1083 | return next; | ||
1084 | } | ||
1085 | |||
1086 | void mc_finish_switch(struct task_struct *prev) | ||
1087 | { | ||
1088 | struct cpu_entry* entry = &__get_cpu_var(cpus); | ||
1089 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
1090 | TRACE_TASK(prev, "Switched away from to " TS "\n", | ||
1091 | TA(entry->scheduled)); | ||
1092 | } | ||
1093 | |||
1094 | /* | ||
1095 | * This is the plugin's release at function, called by the release task-set | ||
1096 | * system call. Other places in the file use the generic LITMUS release_at(), | ||
1097 | * which is not this. | ||
1098 | */ | ||
1099 | void mc_release_at(struct task_struct *ts, lt_t start) | ||
1100 | { | ||
1101 | /* hack so that we can have CE timers start at the right time */ | ||
1102 | if (CRIT_LEVEL_A == tsk_mc_crit(ts)) | ||
1103 | mc_ce_release_at_common(ts, start); | ||
1104 | else | ||
1105 | release_at(ts, start); | ||
1106 | } | ||
1107 | |||
1108 | long mc_deactivate_plugin(void) | ||
1109 | { | ||
1110 | return mc_ce_deactivate_plugin_common(); | ||
1111 | } | ||
1112 | |||
1113 | /* ************************************************************************** | ||
1114 | * Initialization | ||
1115 | * ************************************************************************** */ | ||
1116 | |||
1117 | /* Initialize values here so that they are allocated with the module | ||
1118 | * and destroyed when the module is unloaded. | ||
1119 | */ | ||
1120 | |||
1121 | /* LVL-A */ | ||
1122 | DEFINE_PER_CPU(struct domain_data, _mc_crit_a); | ||
1123 | DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock); | ||
1124 | DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data); | ||
1125 | /* LVL-B */ | ||
1126 | DEFINE_PER_CPU(struct domain_data, _mc_crit_b); | ||
1127 | DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt); | ||
1128 | /* LVL-C */ | ||
1129 | static struct domain_data _mc_crit_c; | ||
1130 | static rt_domain_t _mc_crit_c_rt; | ||
1131 | struct bheap _mc_heap_c; | ||
1132 | struct bheap_node _mc_nodes_c[NR_CPUS]; | ||
1133 | |||
1134 | static long mc_activate_plugin(void) | ||
1135 | { | ||
1136 | struct domain_data *dom_data; | ||
1137 | struct domain *dom; | ||
1138 | struct domain_data *our_domains[NR_CPUS]; | ||
1139 | int cpu, n = 0; | ||
1140 | long ret; | ||
1141 | |||
1142 | #ifdef CONFIG_RELEASE_MASTER | ||
1143 | interrupt_cpu = atomic_read(&release_master_cpu); | ||
1144 | #if defined(CONFIG_PLUGIN_MC_REDIRECT) || defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) | ||
1145 | if (NO_CPU == interrupt_cpu) { | ||
1146 | printk(KERN_ERR "LITMUS-MC: need a release master\n"); | ||
1147 | ret = -EINVAL; | ||
1148 | goto out; | ||
1149 | } | ||
1150 | #endif | ||
1151 | #endif | ||
1152 | |||
1153 | for_each_online_cpu(cpu) { | ||
1154 | BUG_ON(NR_CPUS <= n); | ||
1155 | dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain; | ||
1156 | dom_data = domain_data(dom); | ||
1157 | our_domains[cpu] = dom_data; | ||
1158 | #if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) | ||
1159 | per_cpu(cpus, cpu).event_group = | ||
1160 | get_event_group_for(interrupt_cpu); | ||
1161 | #elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) | ||
1162 | per_cpu(cpus, cpu).event_group = get_event_group_for(cpu); | ||
1163 | #endif | ||
1164 | n++; | ||
1165 | } | ||
1166 | ret = mc_ce_set_domains(n, our_domains); | ||
1167 | if (ret) | ||
1168 | goto out; | ||
1169 | ret = mc_ce_activate_plugin_common(); | ||
1170 | out: | ||
1171 | return ret; | ||
1172 | } | ||
1173 | |||
1174 | |||
1175 | static void mc_release_ts(lt_t time) | ||
1176 | { | ||
1177 | int i, cpu, base_id = 0, cont_id = -1; | ||
1178 | char name[TASK_COMM_LEN]; | ||
1179 | enum crit_level level; | ||
1180 | struct cpu_entry *entry; | ||
1181 | struct crit_entry *ce; | ||
1182 | |||
1183 | level = CRIT_LEVEL_A; | ||
1184 | strcpy(name, "LVL-A"); | ||
1185 | for_each_online_cpu(cpu) { | ||
1186 | entry = &per_cpu(cpus, cpu); | ||
1187 | trace_litmus_container_param(++cont_id, &name); | ||
1188 | ce = &entry->crit_entries[level]; | ||
1189 | sched_trace_server_param(sid(ce), cont_id, 0, 0); | ||
1190 | } | ||
1191 | |||
1192 | level = CRIT_LEVEL_B; | ||
1193 | strcpy(name, "LVL-B"); | ||
1194 | for_each_online_cpu(cpu) { | ||
1195 | entry = &per_cpu(cpus, cpu); | ||
1196 | trace_litmus_container_param(++cont_id, &name); | ||
1197 | ce = &entry->crit_entries[level]; | ||
1198 | sched_trace_server_param(sid(ce), cont_id, 0, 0); | ||
1199 | } | ||
1200 | |||
1201 | level = CRIT_LEVEL_C; | ||
1202 | strcpy(name, "LVL-C"); | ||
1203 | trace_litmus_container_param(++cont_id, &name); | ||
1204 | for_each_online_cpu(cpu) { | ||
1205 | entry = &per_cpu(cpus, cpu); | ||
1206 | ce = &entry->crit_entries[level]; | ||
1207 | sched_trace_server_param(sid(ce), cont_id, 0, 0); | ||
1208 | } | ||
1209 | |||
1210 | |||
1211 | |||
1212 | } | ||
1213 | |||
1214 | static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = { | ||
1215 | .plugin_name = "MC", | ||
1216 | .task_new = mc_task_new, | ||
1217 | .complete_job = complete_job, | ||
1218 | .task_exit = mc_task_exit, | ||
1219 | .schedule = mc_schedule, | ||
1220 | .task_wake_up = mc_task_wake_up, | ||
1221 | .task_block = mc_task_block, | ||
1222 | .admit_task = mc_admit_task, | ||
1223 | .activate_plugin = mc_activate_plugin, | ||
1224 | .release_at = mc_release_at, | ||
1225 | .deactivate_plugin = mc_deactivate_plugin, | ||
1226 | .finish_switch = mc_finish_switch, | ||
1227 | .release_ts = mc_release_ts, | ||
1228 | }; | ||
1229 | |||
1230 | static void init_crit_entry(struct crit_entry *ce, enum crit_level level, | ||
1231 | struct domain_data *dom_data, | ||
1232 | struct bheap_node *node) | ||
1233 | { | ||
1234 | ce->level = level; | ||
1235 | ce->linked = NULL; | ||
1236 | ce->node = node; | ||
1237 | ce->domain = &dom_data->domain; | ||
1238 | ce->state = CS_ACTIVE; | ||
1239 | #ifdef CONFIG_MERGE_TIMERS | ||
1240 | init_event(&ce->event, level, mc_ghost_exhausted, | ||
1241 | event_list_alloc(GFP_ATOMIC)); | ||
1242 | #else | ||
1243 | hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
1244 | ce->timer.function = mc_ghost_exhausted; | ||
1245 | #endif | ||
1246 | |||
1247 | } | ||
1248 | |||
1249 | static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data, | ||
1250 | enum crit_level level) | ||
1251 | { | ||
1252 | dom_data->heap = NULL; | ||
1253 | dom_data->crit_entry = &entry->crit_entries[level]; | ||
1254 | init_crit_entry(dom_data->crit_entry, level, dom_data, NULL); | ||
1255 | } | ||
1256 | |||
1257 | static void init_global_domain(struct domain_data *dom_data, enum crit_level level, | ||
1258 | struct bheap *heap, struct bheap_node *nodes) | ||
1259 | { | ||
1260 | int cpu; | ||
1261 | struct cpu_entry *entry; | ||
1262 | struct crit_entry *ce; | ||
1263 | struct bheap_node *node; | ||
1264 | |||
1265 | dom_data->crit_entry = NULL; | ||
1266 | dom_data->heap = heap; | ||
1267 | bheap_init(heap); | ||
1268 | |||
1269 | for_each_online_cpu(cpu) { | ||
1270 | entry = &per_cpu(cpus, cpu); | ||
1271 | node = &nodes[cpu]; | ||
1272 | ce = &entry->crit_entries[level]; | ||
1273 | init_crit_entry(ce, level, dom_data, node); | ||
1274 | bheap_node_init(&ce->node, ce); | ||
1275 | bheap_insert(cpu_lower_prio, heap, node); | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | static inline void init_edf_domain(struct domain *dom, rt_domain_t *rt, | ||
1280 | enum crit_level prio, int is_partitioned, int cpu) | ||
1281 | { | ||
1282 | pd_domain_init(dom, rt, edf_ready_order, NULL, | ||
1283 | mc_release_jobs, mc_preempt_needed, | ||
1284 | edf_higher_prio); | ||
1285 | rt->level = prio; | ||
1286 | #if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS) | ||
1287 | /* All timers are on one CPU and release-master is using the event | ||
1288 | * merging interface as well. */ | ||
1289 | BUG_ON(NO_CPU == interrupt_cpu); | ||
1290 | rt->event_group = get_event_group_for(interrupt_cpu); | ||
1291 | rt->prio = prio; | ||
1292 | #elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS) | ||
1293 | /* Using release master, but not merging timers. */ | ||
1294 | rt->release_master = interrupt_cpu; | ||
1295 | #elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS) | ||
1296 | /* Merge the timers, but don't move them to the release master. */ | ||
1297 | if (is_partitioned) { | ||
1298 | rt->event_group = get_event_group_for(cpu); | ||
1299 | } else { | ||
1300 | /* Global timers will be added to the event groups that code is | ||
1301 | * executing on when add_event() is called. | ||
1302 | */ | ||
1303 | rt->event_group = NULL; | ||
1304 | } | ||
1305 | rt->prio = prio; | ||
1306 | #endif | ||
1307 | } | ||
1308 | |||
1309 | struct domain_data *ce_domain_for(int); | ||
1310 | static int __init init_mc(void) | ||
1311 | { | ||
1312 | int cpu; | ||
1313 | struct cpu_entry *entry; | ||
1314 | struct domain_data *dom_data; | ||
1315 | rt_domain_t *rt; | ||
1316 | raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */ | ||
1317 | struct ce_dom_data *ce_data; | ||
1318 | |||
1319 | for_each_online_cpu(cpu) { | ||
1320 | entry = &per_cpu(cpus, cpu); | ||
1321 | |||
1322 | /* CPU */ | ||
1323 | entry->cpu = cpu; | ||
1324 | entry->scheduled = NULL; | ||
1325 | entry->linked = NULL; | ||
1326 | |||
1327 | raw_spin_lock_init(&entry->lock); | ||
1328 | |||
1329 | #ifdef CONFIG_PLUGIN_MC_REDIRECT | ||
1330 | raw_spin_lock_init(&entry->redir_lock); | ||
1331 | INIT_LIST_HEAD(&entry->redir); | ||
1332 | #endif | ||
1333 | |||
1334 | /* CRIT_LEVEL_A */ | ||
1335 | dom_data = &per_cpu(_mc_crit_a, cpu); | ||
1336 | ce_data = &per_cpu(_mc_crit_a_ce_data, cpu); | ||
1337 | a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu); | ||
1338 | raw_spin_lock_init(a_dom_lock); | ||
1339 | ce_domain_init(&dom_data->domain, | ||
1340 | a_dom_lock, ce_requeue, ce_peek_and_take_ready, | ||
1341 | ce_peek_and_take_ready, mc_preempt_needed, | ||
1342 | ce_higher_prio, ce_data, cpu, | ||
1343 | ce_timer_function); | ||
1344 | init_local_domain(entry, dom_data, CRIT_LEVEL_A); | ||
1345 | dom_data->domain.name = "LVL-A"; | ||
1346 | |||
1347 | /* CRIT_LEVEL_B */ | ||
1348 | dom_data = &per_cpu(_mc_crit_b, cpu); | ||
1349 | rt = &per_cpu(_mc_crit_b_rt, cpu); | ||
1350 | init_local_domain(entry, dom_data, CRIT_LEVEL_B); | ||
1351 | init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu); | ||
1352 | b_dom_lock = dom_data->domain.lock; | ||
1353 | raw_spin_lock_init(b_dom_lock); | ||
1354 | dom_data->domain.name = "LVL-B"; | ||
1355 | } | ||
1356 | |||
1357 | /* CRIT_LEVEL_C */ | ||
1358 | init_global_domain(&_mc_crit_c, CRIT_LEVEL_C, | ||
1359 | &_mc_heap_c, _mc_nodes_c); | ||
1360 | init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C, | ||
1361 | 0, NO_CPU); | ||
1362 | c_dom_lock = _mc_crit_c.domain.lock; | ||
1363 | raw_spin_lock_init(c_dom_lock); | ||
1364 | _mc_crit_c.domain.name = "LVL-C"; | ||
1365 | |||
1366 | return register_sched_plugin(&mc_plugin); | ||
1367 | } | ||
1368 | |||
1369 | module_init(init_mc); | ||
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c index 4808377b9bb7..702b46da93d5 100644 --- a/litmus/sched_mc_ce.c +++ b/litmus/sched_mc_ce.c | |||
@@ -139,7 +139,7 @@ static void mc_ce_job_completion(struct domain *dom, struct task_struct *ts) | |||
139 | 139 | ||
140 | TRACE_TASK(ts, "Completed\n"); | 140 | TRACE_TASK(ts, "Completed\n"); |
141 | 141 | ||
142 | sched_trace_task_completion(ts, 0); | 142 | /* sched_trace_task_completion(ts, 0); */ |
143 | /* post-increment is important here */ | 143 | /* post-increment is important here */ |
144 | just_finished = (tsk_rt(ts)->job_params.job_no)++; | 144 | just_finished = (tsk_rt(ts)->job_params.job_no)++; |
145 | 145 | ||
@@ -292,7 +292,7 @@ long mc_ce_admit_task_common(struct task_struct *ts) | |||
292 | printk(KERN_INFO "litmus: couldn't get pid struct for %d\n", | 292 | printk(KERN_INFO "litmus: couldn't get pid struct for %d\n", |
293 | ts->pid); | 293 | ts->pid); |
294 | goto out; | 294 | goto out; |
295 | n } | 295 | } |
296 | 296 | ||
297 | if (lvl_a_id >= pid_table->num_pid_entries) { | 297 | if (lvl_a_id >= pid_table->num_pid_entries) { |
298 | printk(KERN_INFO "litmus: level A id greater than expected " | 298 | printk(KERN_INFO "litmus: level A id greater than expected " |
@@ -499,7 +499,7 @@ lt_t mc_ce_timer_callback_common(struct domain *dom) | |||
499 | tsk_rt(should_schedule)->job_params.deadline - | 499 | tsk_rt(should_schedule)->job_params.deadline - |
500 | pid_entry->budget; | 500 | pid_entry->budget; |
501 | tsk_rt(should_schedule)->job_params.exec_time = 0; | 501 | tsk_rt(should_schedule)->job_params.exec_time = 0; |
502 | sched_trace_task_release(should_schedule); | 502 | /* sched_trace_task_release(should_schedule); */ |
503 | set_rt_flags(ce_data->should_schedule, RT_F_RUNNING); | 503 | set_rt_flags(ce_data->should_schedule, RT_F_RUNNING); |
504 | } | 504 | } |
505 | return next_timer_abs; | 505 | return next_timer_abs; |