aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-01-26 17:07:52 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-01-26 17:44:46 -0500
commit00ffad8cfa533223121c8b400ae829ccef2ddfe8 (patch)
tree38f89d691a0374c9df92ebb98ebdc697e2f18ec8
parent97cb4dc4e806cd52ea1c415bcd081f08c49b776d (diff)
Add EDF-WM plugin
[semi-part backport]
-rw-r--r--include/litmus/rt_param.h53
-rw-r--r--litmus/Makefile3
-rw-r--r--litmus/sched_edf_wm.c785
3 files changed, 840 insertions, 1 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index a7a183f34a80..9927b09e0a01 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -1,3 +1,5 @@
1#include <linux/threads.h>
2
1/* 3/*
2 * Definition of the scheduler plugin interface. 4 * Definition of the scheduler plugin interface.
3 * 5 *
@@ -33,6 +35,36 @@ typedef enum {
33 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ 35 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
34} budget_policy_t; 36} budget_policy_t;
35 37
38/* The parameters for the EDF-WM semi-partitioned scheduler.
39 * Each task may be split across multiple cpus. Each per-cpu allocation
40 * is called a 'slice'.
41 */
42#define MAX_EDF_WM_SLICES 24
43#define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */
44
45struct edf_wm_slice {
46 /* on which CPU is this slice allocated */
47 unsigned int cpu;
48 /* relative deadline from job release (not from slice release!) */
49 lt_t deadline;
50 /* budget of this slice; must be precisely enforced */
51 lt_t budget;
52 /* offset of this slice relative to the job release */
53 lt_t offset;
54};
55
56/* If a job is not sliced across multiple CPUs, then
57 * count is set to zero and none of the slices is used.
58 * This implies that count == 1 is illegal.
59 */
60struct edf_wm_params {
61 /* enumeration of all slices */
62 struct edf_wm_slice slices[MAX_EDF_WM_SLICES];
63
64 /* how many slices are defined? */
65 unsigned int count;
66};
67
36struct rt_task { 68struct rt_task {
37 lt_t exec_cost; 69 lt_t exec_cost;
38 lt_t period; 70 lt_t period;
@@ -40,6 +72,12 @@ struct rt_task {
40 unsigned int cpu; 72 unsigned int cpu;
41 task_class_t cls; 73 task_class_t cls;
42 budget_policy_t budget_policy; /* ignored by pfair */ 74 budget_policy_t budget_policy; /* ignored by pfair */
75
76 /* parameters used by the semi-partitioned algorithms */
77 union {
78 /* EDF-WM; defined in sched_edf_wm.c */
79 struct edf_wm_params wm;
80 } semi_part;
43}; 81};
44 82
45/* The definition of the data that is shared between the kernel and real-time 83/* The definition of the data that is shared between the kernel and real-time
@@ -184,6 +222,21 @@ struct rt_param {
184 222
185 /* Pointer to the page shared between userspace and kernel. */ 223 /* Pointer to the page shared between userspace and kernel. */
186 struct control_page * ctrl_page; 224 struct control_page * ctrl_page;
225
226 /* runtime info for the semi-part plugins */
227 union {
228 /* EDF-WM runtime information */
229 struct {
230 /* at which exec time did the current slice start? */
231 lt_t exec_time;
232 /* when did the job suspend? */
233 lt_t suspend_time;
234 /* cached job parameters */
235 lt_t job_release, job_deadline;
236 /* pointer to the current slice */
237 struct edf_wm_slice* slice;
238 } wm;
239 } semi_part;
187}; 240};
188 241
189/* Possible RT flags */ 242/* Possible RT flags */
diff --git a/litmus/Makefile b/litmus/Makefile
index f301d2842e43..7fe37a59c425 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -14,7 +14,8 @@ obj-y = sched_plugin.o litmus.o \
14 bheap.o \ 14 bheap.o \
15 ctrldev.o \ 15 ctrldev.o \
16 sched_gsn_edf.o \ 16 sched_gsn_edf.o \
17 sched_psn_edf.o 17 sched_psn_edf.o \
18 sched_edf_wm.o
18 19
19obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 20obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
20obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 21obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c
new file mode 100644
index 000000000000..f4219ca812d3
--- /dev/null
+++ b/litmus/sched_edf_wm.c
@@ -0,0 +1,785 @@
1/* EDF-WM: based on PSN-EDF.
2 */
3
4#include <linux/percpu.h>
5#include <linux/sched.h>
6#include <linux/list.h>
7#include <linux/spinlock.h>
8
9#include <linux/module.h>
10
11#include <litmus/litmus.h>
12#include <litmus/jobs.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/edf_common.h>
15
16typedef struct {
17 rt_domain_t domain;
18 int cpu;
19 struct task_struct* scheduled; /* only RT tasks */
20
21 /* The enforcement timer is used to accurately police
22 * slice budgets. */
23 struct hrtimer enforcement_timer;
24 int timer_armed;
25/*
26 * scheduling lock slock
27 * protects the domain and serializes scheduling decisions
28 */
29#define slock domain.ready_lock
30
31} wm_domain_t;
32
33DEFINE_PER_CPU(wm_domain_t, wm_domains);
34
35#define TRACE_DOM(dom, fmt, args...) \
36 TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args)
37
38
39#define local_domain (&__get_cpu_var(wm_domains))
40#define remote_domain(cpu) (&per_cpu(wm_domains, cpu))
41#define domain_of_task(task) (remote_domain(get_partition(task)))
42#define domain_from_timer(t) (container_of((t), wm_domain_t, enforcement_timer))
43
44static int is_sliced_task(struct task_struct* t)
45{
46 return tsk_rt(t)->task_params.semi_part.wm.count;
47}
48
49static struct edf_wm_slice* get_last_slice(struct task_struct* t)
50{
51 int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1;
52 return tsk_rt(t)->task_params.semi_part.wm.slices + idx;
53}
54
55static void compute_slice_params(struct task_struct* t)
56{
57 struct rt_param* p = tsk_rt(t);
58 /* Here we do a little trick to make the generic EDF code
59 * play well with job slices. We overwrite the job-level
60 * release and deadline fields with the slice-specific values
61 * so that we can enqueue this task in an EDF rt_domain_t
62 * without issue. The actual values are cached in the semi_part.wm
63 * structure. */
64 p->job_params.deadline = p->semi_part.wm.job_release +
65 p->semi_part.wm.slice->deadline;
66 p->job_params.release = p->semi_part.wm.job_release +
67 p->semi_part.wm.slice->offset;
68
69 /* Similarly, we play a trick on the cpu field. */
70 p->task_params.cpu = p->semi_part.wm.slice->cpu;
71
72 /* update the per-slice budget reference */
73 p->semi_part.wm.exec_time = p->job_params.exec_time;
74}
75
76static void complete_sliced_job(struct task_struct* t)
77{
78 struct rt_param* p = tsk_rt(t);
79
80 /* We need to undo our trickery to the
81 * job parameters (see above). */
82 p->job_params.release = p->semi_part.wm.job_release;
83 p->job_params.deadline = p->semi_part.wm.job_deadline;
84
85 /* Ok, now let generic code do the actual work. */
86 prepare_for_next_period(t);
87
88 /* And finally cache the updated parameters. */
89 p->semi_part.wm.job_release = p->job_params.release;
90 p->semi_part.wm.job_deadline = p->job_params.deadline;
91}
92
93static lt_t slice_exec_time(struct task_struct* t)
94{
95 struct rt_param* p = tsk_rt(t);
96
97 /* Compute how much execution time has been consumed
98 * since last slice advancement. */
99 return p->job_params.exec_time - p->semi_part.wm.exec_time;
100}
101
102static lt_t slice_budget(struct task_struct* t)
103{
104 return tsk_rt(t)->semi_part.wm.slice->budget;
105}
106
107static int slice_budget_exhausted(struct task_struct* t)
108{
109 return slice_exec_time(t) >= slice_budget(t);
110}
111
112/* assumes positive remainder; overflows otherwise */
113static lt_t slice_budget_remaining(struct task_struct* t)
114{
115 return slice_budget(t) - slice_exec_time(t);
116}
117
118static lt_t wm_budget_remaining(struct task_struct* t)
119{
120 if (is_sliced_task(t))
121 return slice_budget_remaining(t);
122 else
123 return budget_remaining(t);
124}
125
126static int wm_budget_exhausted(struct task_struct* t)
127{
128 if (is_sliced_task(t))
129 return slice_budget_exhausted(t);
130 else
131 return budget_exhausted(t);
132}
133
134static void advance_next_slice(struct task_struct* t, int completion_signaled)
135{
136 int idx;
137 struct rt_param* p = tsk_rt(t);
138
139 /* make sure this is actually a sliced job */
140 BUG_ON(!is_sliced_task(t));
141 BUG_ON(is_queued(t));
142
143 /* determine index of current slice */
144 idx = p->semi_part.wm.slice -
145 p->task_params.semi_part.wm.slices;
146
147 TRACE_TASK(t, "advancing slice %d; excess=%lluns; "
148 "completion_signaled=%d.\n",
149 idx, slice_exec_time(t) - slice_budget(t),
150 completion_signaled);
151
152 if (completion_signaled)
153 idx = 0;
154 else
155 /* increment and wrap around, if necessary */
156 idx = (idx + 1) % p->task_params.semi_part.wm.count;
157
158 /* point to next slice */
159 p->semi_part.wm.slice =
160 p->task_params.semi_part.wm.slices + idx;
161
162 /* Check if we need to update essential job parameters. */
163 if (!idx) {
164 /* job completion */
165 sched_trace_task_completion(t, !completion_signaled);
166 TRACE_TASK(t, "completed sliced job"
167 "(signaled:%d)\n", completion_signaled);
168 complete_sliced_job(t);
169 }
170
171 /* Update job parameters for new slice. */
172 compute_slice_params(t);
173}
174
175/* assumes time_passed does not advance past the last slice */
176static void fast_forward_slices(struct task_struct* t, lt_t time_passed)
177{
178 TRACE_TASK(t, "fast forwarding %lluns\n", time_passed);
179
180 /* this is NOT the slice version */
181 BUG_ON(budget_remaining(t) <= time_passed);
182
183 if (wm_budget_exhausted(t)) {
184 /* This can happen if a suspension raced
185 * with a normal slice advancement. wm_schedule()
186 * does not process out_of_time when a task blocks. */
187 TRACE_TASK(t, "block raced with out_of_time?\n");
188 advance_next_slice(t, 0);
189 }
190
191 while (time_passed &&
192 time_passed >= slice_budget_remaining(t)) {
193 /* slice completely exhausted */
194 time_passed -= slice_budget_remaining(t);
195 tsk_rt(t)->job_params.exec_time +=
196 slice_budget_remaining(t);
197
198 BUG_ON(!slice_budget_exhausted(t));
199 BUG_ON(slice_budget_remaining(t) != 0);
200 BUG_ON(tsk_rt(t)->semi_part.wm.slice == get_last_slice(t));
201
202 advance_next_slice(t, 0);
203 }
204 /* add remainder to exec cost */
205 tsk_rt(t)->job_params.exec_time += time_passed;
206}
207
208/* we assume the lock is being held */
209static void preempt(wm_domain_t *dom)
210{
211 TRACE_DOM(dom, "will be preempted.\n");
212 /* We pass NULL as the task since non-preemptive sections are not
213 * supported in this plugin, so per-task checks are not needed. */
214 preempt_if_preemptable(NULL, dom->cpu);
215}
216
217static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
218{
219 wm_domain_t *dom = domain_from_timer(timer);
220 unsigned long flags;
221
222 raw_spin_lock_irqsave(&dom->slock, flags);
223 if (likely(dom->timer_armed)) {
224 TRACE_DOM(dom, "enforcement timer fired.\n");
225 dom->timer_armed = 0;
226 preempt(dom);
227 } else
228 TRACE_DOM(dom, "timer fired but not armed???\n");
229 raw_spin_unlock_irqrestore(&dom->slock, flags);
230 return HRTIMER_NORESTART;
231}
232
233/* assumes called with IRQs off */
234static void cancel_enforcement_timer(wm_domain_t* dom)
235{
236 int ret;
237
238 TRACE_DOM(dom, "cancelling enforcement timer.\n");
239
240 /* Arming the timer makes only sense locally. */
241 BUG_ON(dom->cpu != raw_smp_processor_id());
242
243 /* Since interrupts are disabled and and dom->timer_armed is only
244 * modified locally, we do not need to lock this domain.
245 */
246
247 if (dom->timer_armed) {
248 ret = hrtimer_try_to_cancel(&dom->enforcement_timer);
249 /* Should never be inactive. */
250 BUG_ON(ret == 0);
251 /* Should never be running concurrently. */
252 BUG_ON(ret == -1);
253
254 dom->timer_armed = 0;
255 }
256}
257
258/* assumes called with IRQs off */
259static void arm_enforcement_timer(wm_domain_t* dom, struct task_struct* t)
260{
261 lt_t when_to_fire;
262
263 TRACE_DOM(dom, "arming enforcement timer.\n");
264
265 /* The task has to belong to this domain. */
266 BUG_ON(domain_of_task(t) != dom);
267
268 /* Arming the timer makes only sense locally. */
269 BUG_ON(dom->cpu != raw_smp_processor_id());
270
271 /* Calling this when there is no budget left for the task
272 * makes no sense. */
273 BUG_ON(wm_budget_exhausted(t));
274
275 /* __hrtimer_start_range_ns() cancels the timer
276 * anyway, so we don't have to check whether it is still armed
277 */
278
279 when_to_fire = litmus_clock() + wm_budget_remaining(t);
280 __hrtimer_start_range_ns(&dom->enforcement_timer,
281 ns_to_ktime(when_to_fire),
282 0 /* delta */,
283 HRTIMER_MODE_ABS_PINNED,
284 0 /* no wakeup */);
285
286 dom->timer_armed = 1;
287}
288
289static void wm_domain_init(wm_domain_t* dom,
290 check_resched_needed_t check,
291 release_jobs_t release,
292 int cpu)
293{
294 edf_domain_init(&dom->domain, check, release);
295 dom->cpu = cpu;
296 dom->scheduled = NULL;
297 dom->timer_armed = 0;
298 hrtimer_init(&dom->enforcement_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
299 dom->enforcement_timer.function = on_enforcement_timeout;
300}
301
302static void wm_requeue_remote(struct task_struct *t)
303{
304 wm_domain_t *dom = domain_of_task(t);
305
306 set_rt_flags(t, RT_F_RUNNING);
307 if (is_released(t, litmus_clock()))
308 /* acquires necessary lock */
309 add_ready(&dom->domain, t);
310 else
311 /* force timer on remote CPU */
312 add_release_on(&dom->domain, t, get_partition(t));
313}
314
315static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf)
316{
317 if (t->state != TASK_RUNNING)
318 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
319
320 set_rt_flags(t, RT_F_RUNNING);
321 if (is_released(t, litmus_clock()))
322 __add_ready(edf, t);
323 else
324 add_release(edf, t); /* it has got to wait */
325}
326
327static int wm_check_resched(rt_domain_t *edf)
328{
329 wm_domain_t *dom = container_of(edf, wm_domain_t, domain);
330
331 /* because this is a callback from rt_domain_t we already hold
332 * the necessary lock for the ready queue
333 */
334 if (edf_preemption_needed(edf, dom->scheduled)) {
335 preempt(dom);
336 return 1;
337 } else
338 return 0;
339}
340
341static void regular_job_completion(struct task_struct* t, int forced)
342{
343 sched_trace_task_completion(t, forced);
344 TRACE_TASK(t, "job_completion().\n");
345
346 set_rt_flags(t, RT_F_SLEEP);
347 prepare_for_next_period(t);
348}
349
350static void wm_job_or_slice_completion(struct task_struct* t,
351 int completion_signaled)
352{
353 if (is_sliced_task(t))
354 advance_next_slice(t, completion_signaled);
355 else
356 regular_job_completion(t, !completion_signaled);
357}
358
359static void wm_tick(struct task_struct *t)
360{
361 wm_domain_t *dom = local_domain;
362
363 /* Check for inconsistency. We don't need the lock for this since
364 * ->scheduled is only changed in schedule, which obviously is not
365 * executing in parallel on this CPU
366 */
367 BUG_ON(is_realtime(t) && t != dom->scheduled);
368
369 if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) {
370 set_tsk_need_resched(t);
371 TRACE_DOM(dom, "budget of %d exhausted in tick\n",
372 t->pid);
373 }
374}
375
376static struct task_struct* wm_schedule(struct task_struct * prev)
377{
378 wm_domain_t *dom = local_domain;
379 rt_domain_t *edf = &dom->domain;
380 struct task_struct *next, *migrate = NULL;
381
382 int out_of_time, sleep, preempt, wrong_cpu, exists, blocks, resched;
383
384 raw_spin_lock(&dom->slock);
385
386 /* Sanity checking:
387 * When a task exits (dead) dom->schedule may be null
388 * and prev _is_ realtime. */
389 BUG_ON(dom->scheduled && dom->scheduled != prev);
390 BUG_ON(dom->scheduled && !is_realtime(prev));
391
392 /* (0) Determine state */
393 exists = dom->scheduled != NULL;
394 wrong_cpu = exists && get_partition(dom->scheduled) != dom->cpu;
395 blocks = exists && !is_running(dom->scheduled);
396 out_of_time = exists
397 && budget_enforced(dom->scheduled)
398 && wm_budget_exhausted(dom->scheduled);
399 sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP;
400 preempt = edf_preemption_needed(edf, prev);
401
402 /* If we need to preempt do so.
403 * The following checks set resched to 1 in case of special
404 * circumstances.
405 */
406 resched = preempt;
407
408
409 if (exists)
410 TRACE_TASK(prev,
411 "blocks:%d out_of_time:%d sleep:%d preempt:%d "
412 "wrong_cpu:%d state:%d sig:%d\n",
413 blocks, out_of_time, sleep, preempt, wrong_cpu,
414 prev->state, signal_pending(prev));
415
416 /* If a task blocks we have no choice but to reschedule.
417 */
418 if (blocks)
419 resched = 1;
420
421 /* This can happen if sliced task was moved to the next slice
422 * by the wake_up() code path while still being scheduled.
423 */
424 if (wrong_cpu)
425 resched = 1;
426
427 /* Any task that is preemptable and either exhausts its execution
428 * budget or wants to sleep completes. We may have to reschedule after
429 * this.
430 */
431 if ((out_of_time || sleep) && !blocks) {
432 wm_job_or_slice_completion(dom->scheduled, sleep);
433 resched = 1;
434 }
435
436 /* The final scheduling decision. Do we need to switch for some reason?
437 * Switch if we are in RT mode and have no task or if we need to
438 * resched.
439 */
440 next = NULL;
441 if (resched || !exists) {
442 if (dom->scheduled && !blocks) {
443 if (get_partition(dom->scheduled) == dom->cpu)
444 /* local task */
445 wm_requeue_local(dom->scheduled, edf);
446 else
447 /* not local anymore; wait until we drop the
448 * ready queue lock */
449 migrate = dom->scheduled;
450 }
451 next = __take_ready(edf);
452 } else
453 /* Only override Linux scheduler if we have a real-time task
454 * scheduled that needs to continue. */
455 if (exists)
456 next = prev;
457
458 if (next) {
459 TRACE_TASK(next, "scheduled at %llu (state:%d/%d)\n", litmus_clock(),
460 next->state, is_running(next));
461 set_rt_flags(next, RT_F_RUNNING);
462 } else if (exists) {
463 TRACE("becoming idle at %llu\n", litmus_clock());
464 }
465
466 dom->scheduled = next;
467 raw_spin_unlock(&dom->slock);
468
469 /* check if we need to push the previous task onto another queue */
470 if (migrate) {
471 TRACE_TASK(migrate, "schedule-initiated migration to %d\n",
472 get_partition(migrate));
473 wm_requeue_remote(migrate);
474 }
475
476 if (next && budget_precisely_enforced(next)) {
477 /* Make sure we call into the scheduler when this budget
478 * expires. */
479 arm_enforcement_timer(dom, next);
480 } else if (dom->timer_armed) {
481 /* Make sure we don't cause unnecessary interrupts. */
482 cancel_enforcement_timer(dom);
483 }
484
485 return next;
486}
487
488
489/* Prepare a task for running in RT mode
490 */
491static void wm_task_new(struct task_struct * t, int on_rq, int running)
492{
493 wm_domain_t* dom = domain_of_task(t);
494 rt_domain_t* edf = &dom->domain;
495 unsigned long flags;
496
497 TRACE_TASK(t, "edf-wm: task new, cpu = %d\n",
498 t->rt_param.task_params.cpu);
499
500 /* setup job parameters */
501 release_at(t, litmus_clock());
502
503 /* The task should be running in the queue, otherwise signal
504 * code will try to wake it up with fatal consequences.
505 */
506 raw_spin_lock_irqsave(&dom->slock, flags);
507
508 if (is_sliced_task(t)) {
509 /* make sure parameters are initialized consistently */
510 tsk_rt(t)->semi_part.wm.exec_time = 0;
511 tsk_rt(t)->semi_part.wm.job_release = get_release(t);
512 tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t);
513 tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices;
514 tsk_rt(t)->job_params.exec_time = 0;
515 }
516
517 if (running) {
518 /* there shouldn't be anything else running at the time */
519 BUG_ON(dom->scheduled);
520 dom->scheduled = t;
521 } else {
522 wm_requeue_local(t, edf);
523 /* maybe we have to reschedule */
524 preempt(dom);
525 }
526 raw_spin_unlock_irqrestore(&dom->slock, flags);
527}
528
529static void wm_release_at(struct task_struct *t, lt_t start)
530{
531 struct rt_param* p = tsk_rt(t);
532
533 if (is_sliced_task(t)) {
534 /* simulate wrapping to the first slice */
535 p->semi_part.wm.job_deadline = start;
536 p->semi_part.wm.slice = get_last_slice(t);
537 /* FIXME: creates bogus completion event... */
538 advance_next_slice(t, 0);
539 set_rt_flags(t, RT_F_RUNNING);
540 } else
541 /* generic code handles it */
542 release_at(t, start);
543}
544
545static lt_t wm_earliest_release(struct task_struct *t, lt_t now)
546{
547 lt_t deadline;
548 if (is_sliced_task(t))
549 deadline = tsk_rt(t)->semi_part.wm.job_deadline;
550 else
551 deadline = get_deadline(t);
552 if (lt_before(deadline, now))
553 return now;
554 else
555 return deadline;
556}
557
558static void wm_task_wake_up(struct task_struct *t)
559{
560 unsigned long flags;
561 wm_domain_t* dom = domain_of_task(t);
562 rt_domain_t* edf = &dom->domain;
563 struct rt_param* p = tsk_rt(t);
564 lt_t now, sleep_time;
565 int migrate = 0;
566
567 raw_spin_lock_irqsave(&dom->slock, flags);
568 BUG_ON(is_queued(t));
569
570 now = litmus_clock();
571
572 sleep_time = now - p->semi_part.wm.suspend_time;
573
574 TRACE_TASK(t, "wake_up at %llu after %llu, still-scheduled:%d\n",
575 now, sleep_time, dom->scheduled == t);
576
577 /* account sleep time as execution time */
578 if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) {
579 /* new sporadic release */
580 TRACE_TASK(t, "new sporadic release\n");
581 wm_release_at(t, wm_earliest_release(t, now));
582 sched_trace_task_release(t);
583 } else if (is_sliced_task(t)) {
584 /* figure out which slice we should be executing on */
585 fast_forward_slices(t, sleep_time);
586 /* can't be exhausted now */
587 BUG_ON(wm_budget_exhausted(t));
588 } else {
589 /* simply add to the execution time */
590 tsk_rt(t)->job_params.exec_time += sleep_time;
591 }
592
593
594 /* Only add to ready queue if it is not the currently-scheduled
595 * task. This could be the case if a task was woken up concurrently
596 * on a remote CPU before the executing CPU got around to actually
597 * de-scheduling the task, i.e., wake_up() raced with schedule()
598 * and won.
599 */
600 if (dom->scheduled != t) {
601 if (get_partition(t) == dom->cpu)
602 wm_requeue_local(t, edf);
603 else
604 /* post-pone migration until after unlocking */
605 migrate = 1;
606 }
607
608 raw_spin_unlock_irqrestore(&dom->slock, flags);
609
610 if (migrate) {
611 TRACE_TASK(t, "wake_up-initiated migration to %d\n",
612 get_partition(t));
613 wm_requeue_remote(t);
614 }
615
616 TRACE_TASK(t, "wake up done\n");
617}
618
619static void wm_task_block(struct task_struct *t)
620{
621 wm_domain_t* dom = domain_of_task(t);
622 unsigned long flags;
623 lt_t now = litmus_clock();
624
625 TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state);
626
627 tsk_rt(t)->semi_part.wm.suspend_time = now;
628
629 raw_spin_lock_irqsave(&dom->slock, flags);
630 if (is_queued(t)) {
631 TRACE_TASK(t, "still queued; migration invariant failed?\n");
632 remove(&dom->domain, t);
633 }
634 raw_spin_unlock_irqrestore(&dom->slock, flags);
635
636 BUG_ON(!is_realtime(t));
637}
638
639static void wm_task_exit(struct task_struct * t)
640{
641 unsigned long flags;
642 wm_domain_t* dom = domain_of_task(t);
643 rt_domain_t* edf = &dom->domain;
644
645 raw_spin_lock_irqsave(&dom->slock, flags);
646 if (is_queued(t)) {
647 /* dequeue */
648 remove(edf, t);
649 }
650 if (dom->scheduled == t)
651 dom->scheduled = NULL;
652
653 TRACE_TASK(t, "RIP, now reschedule\n");
654
655 preempt(dom);
656 raw_spin_unlock_irqrestore(&dom->slock, flags);
657}
658
659static long wm_check_params(struct task_struct *t)
660{
661 struct rt_param* p = tsk_rt(t);
662 struct edf_wm_params* wm = &p->task_params.semi_part.wm;
663 int i;
664 lt_t tmp;
665
666 if (!is_sliced_task(t)) {
667 /* regular task; nothing to check */
668 TRACE_TASK(t, "accepted regular (non-sliced) task with "
669 "%d slices\n",
670 wm->count);
671 return 0;
672 }
673
674 /* (1) Either not sliced, or more than 1 slice. */
675 if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) {
676 TRACE_TASK(t, "bad number of slices (%u) \n",
677 wm->count);
678 return -EINVAL;
679 }
680
681 /* (2) The partition has to agree with the first slice. */
682 if (get_partition(t) != wm->slices[0].cpu) {
683 TRACE_TASK(t, "partition and first slice CPU differ "
684 "(%d != %d)\n", get_partition(t), wm->slices[0].cpu);
685 return -EINVAL;
686 }
687
688 /* (3) The total budget must agree. */
689 for (i = 0, tmp = 0; i < wm->count; i++)
690 tmp += wm->slices[i].budget;
691 if (get_exec_cost(t) != tmp) {
692 TRACE_TASK(t, "total budget and sum of slice budgets differ\n");
693 return -EINVAL;
694 }
695
696 /* (4) The release of each slice must not precede the previous
697 * deadline. */
698 for (i = 0; i < wm->count - 1; i++)
699 if (wm->slices[i].deadline > wm->slices[i + 1].offset) {
700 TRACE_TASK(t, "slice %d overlaps with slice %d\n",
701 i, i + 1);
702 return -EINVAL;
703 }
704
705 /* (5) The budget of each slice must fit within [offset, deadline] */
706 for (i = 0; i < wm->count; i++)
707 if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) ||
708 wm->slices[i].deadline - wm->slices[i].offset <
709 wm->slices[i].budget) {
710 TRACE_TASK(t, "slice %d is overloaded\n", i);
711 return -EINVAL;
712 }
713
714 /* (6) The budget of each slice must exceed the minimum budget size. */
715 for (i = 0; i < wm->count; i++)
716 if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) {
717 TRACE_TASK(t, "slice %d is too short\n", i);
718 return -EINVAL;
719 }
720
721 /* (7) The CPU of each slice must be different from the previous CPU. */
722 for (i = 0; i < wm->count - 1; i++)
723 if (wm->slices[i].cpu == wm->slices[i + 1].cpu) {
724 TRACE_TASK(t, "slice %d does not migrate\n", i);
725 return -EINVAL;
726 }
727
728 /* (8) The CPU of each slice must be online. */
729 for (i = 0; i < wm->count; i++)
730 if (!cpu_online(wm->slices[i].cpu)) {
731 TRACE_TASK(t, "slice %d is allocated on offline CPU\n",
732 i);
733 return -EINVAL;
734 }
735
736 /* (9) A sliced task's budget must be precisely enforced. */
737 if (!budget_precisely_enforced(t)) {
738 TRACE_TASK(t, "budget is not precisely enforced "
739 "(policy: %d).\n",
740 tsk_rt(t)->task_params.budget_policy);
741 return -EINVAL;
742 }
743
744 TRACE_TASK(t, "accepted sliced task with %d slices\n",
745 wm->count);
746
747 return 0;
748}
749
750static long wm_admit_task(struct task_struct* t)
751{
752 return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL;
753}
754
755/* Plugin object */
756static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = {
757 .plugin_name = "EDF-WM",
758 .tick = wm_tick,
759 .task_new = wm_task_new,
760 .complete_job = complete_job,
761 .task_exit = wm_task_exit,
762 .schedule = wm_schedule,
763 .release_at = wm_release_at,
764 .task_wake_up = wm_task_wake_up,
765 .task_block = wm_task_block,
766 .admit_task = wm_admit_task
767};
768
769
770static int __init init_edf_wm(void)
771{
772 int i;
773
774 /* FIXME: breaks with CPU hotplug
775 */
776 for (i = 0; i < num_online_cpus(); i++) {
777 wm_domain_init(remote_domain(i),
778 wm_check_resched,
779 NULL, i);
780 }
781 return register_sched_plugin(&edf_wm_plugin);
782}
783
784module_init(init_edf_wm);
785