diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-01-26 17:07:52 -0500 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-01-26 17:44:46 -0500 |
commit | 00ffad8cfa533223121c8b400ae829ccef2ddfe8 (patch) | |
tree | 38f89d691a0374c9df92ebb98ebdc697e2f18ec8 | |
parent | 97cb4dc4e806cd52ea1c415bcd081f08c49b776d (diff) |
Add EDF-WM plugin
[semi-part backport]
-rw-r--r-- | include/litmus/rt_param.h | 53 | ||||
-rw-r--r-- | litmus/Makefile | 3 | ||||
-rw-r--r-- | litmus/sched_edf_wm.c | 785 |
3 files changed, 840 insertions, 1 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index a7a183f34a80..9927b09e0a01 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -1,3 +1,5 @@ | |||
1 | #include <linux/threads.h> | ||
2 | |||
1 | /* | 3 | /* |
2 | * Definition of the scheduler plugin interface. | 4 | * Definition of the scheduler plugin interface. |
3 | * | 5 | * |
@@ -33,6 +35,36 @@ typedef enum { | |||
33 | PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ | 35 | PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ |
34 | } budget_policy_t; | 36 | } budget_policy_t; |
35 | 37 | ||
38 | /* The parameters for the EDF-WM semi-partitioned scheduler. | ||
39 | * Each task may be split across multiple cpus. Each per-cpu allocation | ||
40 | * is called a 'slice'. | ||
41 | */ | ||
42 | #define MAX_EDF_WM_SLICES 24 | ||
43 | #define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */ | ||
44 | |||
45 | struct edf_wm_slice { | ||
46 | /* on which CPU is this slice allocated */ | ||
47 | unsigned int cpu; | ||
48 | /* relative deadline from job release (not from slice release!) */ | ||
49 | lt_t deadline; | ||
50 | /* budget of this slice; must be precisely enforced */ | ||
51 | lt_t budget; | ||
52 | /* offset of this slice relative to the job release */ | ||
53 | lt_t offset; | ||
54 | }; | ||
55 | |||
56 | /* If a job is not sliced across multiple CPUs, then | ||
57 | * count is set to zero and none of the slices is used. | ||
58 | * This implies that count == 1 is illegal. | ||
59 | */ | ||
60 | struct edf_wm_params { | ||
61 | /* enumeration of all slices */ | ||
62 | struct edf_wm_slice slices[MAX_EDF_WM_SLICES]; | ||
63 | |||
64 | /* how many slices are defined? */ | ||
65 | unsigned int count; | ||
66 | }; | ||
67 | |||
36 | struct rt_task { | 68 | struct rt_task { |
37 | lt_t exec_cost; | 69 | lt_t exec_cost; |
38 | lt_t period; | 70 | lt_t period; |
@@ -40,6 +72,12 @@ struct rt_task { | |||
40 | unsigned int cpu; | 72 | unsigned int cpu; |
41 | task_class_t cls; | 73 | task_class_t cls; |
42 | budget_policy_t budget_policy; /* ignored by pfair */ | 74 | budget_policy_t budget_policy; /* ignored by pfair */ |
75 | |||
76 | /* parameters used by the semi-partitioned algorithms */ | ||
77 | union { | ||
78 | /* EDF-WM; defined in sched_edf_wm.c */ | ||
79 | struct edf_wm_params wm; | ||
80 | } semi_part; | ||
43 | }; | 81 | }; |
44 | 82 | ||
45 | /* The definition of the data that is shared between the kernel and real-time | 83 | /* The definition of the data that is shared between the kernel and real-time |
@@ -184,6 +222,21 @@ struct rt_param { | |||
184 | 222 | ||
185 | /* Pointer to the page shared between userspace and kernel. */ | 223 | /* Pointer to the page shared between userspace and kernel. */ |
186 | struct control_page * ctrl_page; | 224 | struct control_page * ctrl_page; |
225 | |||
226 | /* runtime info for the semi-part plugins */ | ||
227 | union { | ||
228 | /* EDF-WM runtime information */ | ||
229 | struct { | ||
230 | /* at which exec time did the current slice start? */ | ||
231 | lt_t exec_time; | ||
232 | /* when did the job suspend? */ | ||
233 | lt_t suspend_time; | ||
234 | /* cached job parameters */ | ||
235 | lt_t job_release, job_deadline; | ||
236 | /* pointer to the current slice */ | ||
237 | struct edf_wm_slice* slice; | ||
238 | } wm; | ||
239 | } semi_part; | ||
187 | }; | 240 | }; |
188 | 241 | ||
189 | /* Possible RT flags */ | 242 | /* Possible RT flags */ |
diff --git a/litmus/Makefile b/litmus/Makefile index f301d2842e43..7fe37a59c425 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -14,7 +14,8 @@ obj-y = sched_plugin.o litmus.o \ | |||
14 | bheap.o \ | 14 | bheap.o \ |
15 | ctrldev.o \ | 15 | ctrldev.o \ |
16 | sched_gsn_edf.o \ | 16 | sched_gsn_edf.o \ |
17 | sched_psn_edf.o | 17 | sched_psn_edf.o \ |
18 | sched_edf_wm.o | ||
18 | 19 | ||
19 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 20 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o |
20 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 21 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c new file mode 100644 index 000000000000..f4219ca812d3 --- /dev/null +++ b/litmus/sched_edf_wm.c | |||
@@ -0,0 +1,785 @@ | |||
1 | /* EDF-WM: based on PSN-EDF. | ||
2 | */ | ||
3 | |||
4 | #include <linux/percpu.h> | ||
5 | #include <linux/sched.h> | ||
6 | #include <linux/list.h> | ||
7 | #include <linux/spinlock.h> | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/jobs.h> | ||
13 | #include <litmus/sched_plugin.h> | ||
14 | #include <litmus/edf_common.h> | ||
15 | |||
16 | typedef struct { | ||
17 | rt_domain_t domain; | ||
18 | int cpu; | ||
19 | struct task_struct* scheduled; /* only RT tasks */ | ||
20 | |||
21 | /* The enforcement timer is used to accurately police | ||
22 | * slice budgets. */ | ||
23 | struct hrtimer enforcement_timer; | ||
24 | int timer_armed; | ||
25 | /* | ||
26 | * scheduling lock slock | ||
27 | * protects the domain and serializes scheduling decisions | ||
28 | */ | ||
29 | #define slock domain.ready_lock | ||
30 | |||
31 | } wm_domain_t; | ||
32 | |||
33 | DEFINE_PER_CPU(wm_domain_t, wm_domains); | ||
34 | |||
35 | #define TRACE_DOM(dom, fmt, args...) \ | ||
36 | TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args) | ||
37 | |||
38 | |||
39 | #define local_domain (&__get_cpu_var(wm_domains)) | ||
40 | #define remote_domain(cpu) (&per_cpu(wm_domains, cpu)) | ||
41 | #define domain_of_task(task) (remote_domain(get_partition(task))) | ||
42 | #define domain_from_timer(t) (container_of((t), wm_domain_t, enforcement_timer)) | ||
43 | |||
44 | static int is_sliced_task(struct task_struct* t) | ||
45 | { | ||
46 | return tsk_rt(t)->task_params.semi_part.wm.count; | ||
47 | } | ||
48 | |||
49 | static struct edf_wm_slice* get_last_slice(struct task_struct* t) | ||
50 | { | ||
51 | int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1; | ||
52 | return tsk_rt(t)->task_params.semi_part.wm.slices + idx; | ||
53 | } | ||
54 | |||
55 | static void compute_slice_params(struct task_struct* t) | ||
56 | { | ||
57 | struct rt_param* p = tsk_rt(t); | ||
58 | /* Here we do a little trick to make the generic EDF code | ||
59 | * play well with job slices. We overwrite the job-level | ||
60 | * release and deadline fields with the slice-specific values | ||
61 | * so that we can enqueue this task in an EDF rt_domain_t | ||
62 | * without issue. The actual values are cached in the semi_part.wm | ||
63 | * structure. */ | ||
64 | p->job_params.deadline = p->semi_part.wm.job_release + | ||
65 | p->semi_part.wm.slice->deadline; | ||
66 | p->job_params.release = p->semi_part.wm.job_release + | ||
67 | p->semi_part.wm.slice->offset; | ||
68 | |||
69 | /* Similarly, we play a trick on the cpu field. */ | ||
70 | p->task_params.cpu = p->semi_part.wm.slice->cpu; | ||
71 | |||
72 | /* update the per-slice budget reference */ | ||
73 | p->semi_part.wm.exec_time = p->job_params.exec_time; | ||
74 | } | ||
75 | |||
76 | static void complete_sliced_job(struct task_struct* t) | ||
77 | { | ||
78 | struct rt_param* p = tsk_rt(t); | ||
79 | |||
80 | /* We need to undo our trickery to the | ||
81 | * job parameters (see above). */ | ||
82 | p->job_params.release = p->semi_part.wm.job_release; | ||
83 | p->job_params.deadline = p->semi_part.wm.job_deadline; | ||
84 | |||
85 | /* Ok, now let generic code do the actual work. */ | ||
86 | prepare_for_next_period(t); | ||
87 | |||
88 | /* And finally cache the updated parameters. */ | ||
89 | p->semi_part.wm.job_release = p->job_params.release; | ||
90 | p->semi_part.wm.job_deadline = p->job_params.deadline; | ||
91 | } | ||
92 | |||
93 | static lt_t slice_exec_time(struct task_struct* t) | ||
94 | { | ||
95 | struct rt_param* p = tsk_rt(t); | ||
96 | |||
97 | /* Compute how much execution time has been consumed | ||
98 | * since last slice advancement. */ | ||
99 | return p->job_params.exec_time - p->semi_part.wm.exec_time; | ||
100 | } | ||
101 | |||
102 | static lt_t slice_budget(struct task_struct* t) | ||
103 | { | ||
104 | return tsk_rt(t)->semi_part.wm.slice->budget; | ||
105 | } | ||
106 | |||
107 | static int slice_budget_exhausted(struct task_struct* t) | ||
108 | { | ||
109 | return slice_exec_time(t) >= slice_budget(t); | ||
110 | } | ||
111 | |||
112 | /* assumes positive remainder; overflows otherwise */ | ||
113 | static lt_t slice_budget_remaining(struct task_struct* t) | ||
114 | { | ||
115 | return slice_budget(t) - slice_exec_time(t); | ||
116 | } | ||
117 | |||
118 | static lt_t wm_budget_remaining(struct task_struct* t) | ||
119 | { | ||
120 | if (is_sliced_task(t)) | ||
121 | return slice_budget_remaining(t); | ||
122 | else | ||
123 | return budget_remaining(t); | ||
124 | } | ||
125 | |||
126 | static int wm_budget_exhausted(struct task_struct* t) | ||
127 | { | ||
128 | if (is_sliced_task(t)) | ||
129 | return slice_budget_exhausted(t); | ||
130 | else | ||
131 | return budget_exhausted(t); | ||
132 | } | ||
133 | |||
134 | static void advance_next_slice(struct task_struct* t, int completion_signaled) | ||
135 | { | ||
136 | int idx; | ||
137 | struct rt_param* p = tsk_rt(t); | ||
138 | |||
139 | /* make sure this is actually a sliced job */ | ||
140 | BUG_ON(!is_sliced_task(t)); | ||
141 | BUG_ON(is_queued(t)); | ||
142 | |||
143 | /* determine index of current slice */ | ||
144 | idx = p->semi_part.wm.slice - | ||
145 | p->task_params.semi_part.wm.slices; | ||
146 | |||
147 | TRACE_TASK(t, "advancing slice %d; excess=%lluns; " | ||
148 | "completion_signaled=%d.\n", | ||
149 | idx, slice_exec_time(t) - slice_budget(t), | ||
150 | completion_signaled); | ||
151 | |||
152 | if (completion_signaled) | ||
153 | idx = 0; | ||
154 | else | ||
155 | /* increment and wrap around, if necessary */ | ||
156 | idx = (idx + 1) % p->task_params.semi_part.wm.count; | ||
157 | |||
158 | /* point to next slice */ | ||
159 | p->semi_part.wm.slice = | ||
160 | p->task_params.semi_part.wm.slices + idx; | ||
161 | |||
162 | /* Check if we need to update essential job parameters. */ | ||
163 | if (!idx) { | ||
164 | /* job completion */ | ||
165 | sched_trace_task_completion(t, !completion_signaled); | ||
166 | TRACE_TASK(t, "completed sliced job" | ||
167 | "(signaled:%d)\n", completion_signaled); | ||
168 | complete_sliced_job(t); | ||
169 | } | ||
170 | |||
171 | /* Update job parameters for new slice. */ | ||
172 | compute_slice_params(t); | ||
173 | } | ||
174 | |||
175 | /* assumes time_passed does not advance past the last slice */ | ||
176 | static void fast_forward_slices(struct task_struct* t, lt_t time_passed) | ||
177 | { | ||
178 | TRACE_TASK(t, "fast forwarding %lluns\n", time_passed); | ||
179 | |||
180 | /* this is NOT the slice version */ | ||
181 | BUG_ON(budget_remaining(t) <= time_passed); | ||
182 | |||
183 | if (wm_budget_exhausted(t)) { | ||
184 | /* This can happen if a suspension raced | ||
185 | * with a normal slice advancement. wm_schedule() | ||
186 | * does not process out_of_time when a task blocks. */ | ||
187 | TRACE_TASK(t, "block raced with out_of_time?\n"); | ||
188 | advance_next_slice(t, 0); | ||
189 | } | ||
190 | |||
191 | while (time_passed && | ||
192 | time_passed >= slice_budget_remaining(t)) { | ||
193 | /* slice completely exhausted */ | ||
194 | time_passed -= slice_budget_remaining(t); | ||
195 | tsk_rt(t)->job_params.exec_time += | ||
196 | slice_budget_remaining(t); | ||
197 | |||
198 | BUG_ON(!slice_budget_exhausted(t)); | ||
199 | BUG_ON(slice_budget_remaining(t) != 0); | ||
200 | BUG_ON(tsk_rt(t)->semi_part.wm.slice == get_last_slice(t)); | ||
201 | |||
202 | advance_next_slice(t, 0); | ||
203 | } | ||
204 | /* add remainder to exec cost */ | ||
205 | tsk_rt(t)->job_params.exec_time += time_passed; | ||
206 | } | ||
207 | |||
208 | /* we assume the lock is being held */ | ||
209 | static void preempt(wm_domain_t *dom) | ||
210 | { | ||
211 | TRACE_DOM(dom, "will be preempted.\n"); | ||
212 | /* We pass NULL as the task since non-preemptive sections are not | ||
213 | * supported in this plugin, so per-task checks are not needed. */ | ||
214 | preempt_if_preemptable(NULL, dom->cpu); | ||
215 | } | ||
216 | |||
217 | static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) | ||
218 | { | ||
219 | wm_domain_t *dom = domain_from_timer(timer); | ||
220 | unsigned long flags; | ||
221 | |||
222 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
223 | if (likely(dom->timer_armed)) { | ||
224 | TRACE_DOM(dom, "enforcement timer fired.\n"); | ||
225 | dom->timer_armed = 0; | ||
226 | preempt(dom); | ||
227 | } else | ||
228 | TRACE_DOM(dom, "timer fired but not armed???\n"); | ||
229 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
230 | return HRTIMER_NORESTART; | ||
231 | } | ||
232 | |||
233 | /* assumes called with IRQs off */ | ||
234 | static void cancel_enforcement_timer(wm_domain_t* dom) | ||
235 | { | ||
236 | int ret; | ||
237 | |||
238 | TRACE_DOM(dom, "cancelling enforcement timer.\n"); | ||
239 | |||
240 | /* Arming the timer makes only sense locally. */ | ||
241 | BUG_ON(dom->cpu != raw_smp_processor_id()); | ||
242 | |||
243 | /* Since interrupts are disabled and and dom->timer_armed is only | ||
244 | * modified locally, we do not need to lock this domain. | ||
245 | */ | ||
246 | |||
247 | if (dom->timer_armed) { | ||
248 | ret = hrtimer_try_to_cancel(&dom->enforcement_timer); | ||
249 | /* Should never be inactive. */ | ||
250 | BUG_ON(ret == 0); | ||
251 | /* Should never be running concurrently. */ | ||
252 | BUG_ON(ret == -1); | ||
253 | |||
254 | dom->timer_armed = 0; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | /* assumes called with IRQs off */ | ||
259 | static void arm_enforcement_timer(wm_domain_t* dom, struct task_struct* t) | ||
260 | { | ||
261 | lt_t when_to_fire; | ||
262 | |||
263 | TRACE_DOM(dom, "arming enforcement timer.\n"); | ||
264 | |||
265 | /* The task has to belong to this domain. */ | ||
266 | BUG_ON(domain_of_task(t) != dom); | ||
267 | |||
268 | /* Arming the timer makes only sense locally. */ | ||
269 | BUG_ON(dom->cpu != raw_smp_processor_id()); | ||
270 | |||
271 | /* Calling this when there is no budget left for the task | ||
272 | * makes no sense. */ | ||
273 | BUG_ON(wm_budget_exhausted(t)); | ||
274 | |||
275 | /* __hrtimer_start_range_ns() cancels the timer | ||
276 | * anyway, so we don't have to check whether it is still armed | ||
277 | */ | ||
278 | |||
279 | when_to_fire = litmus_clock() + wm_budget_remaining(t); | ||
280 | __hrtimer_start_range_ns(&dom->enforcement_timer, | ||
281 | ns_to_ktime(when_to_fire), | ||
282 | 0 /* delta */, | ||
283 | HRTIMER_MODE_ABS_PINNED, | ||
284 | 0 /* no wakeup */); | ||
285 | |||
286 | dom->timer_armed = 1; | ||
287 | } | ||
288 | |||
289 | static void wm_domain_init(wm_domain_t* dom, | ||
290 | check_resched_needed_t check, | ||
291 | release_jobs_t release, | ||
292 | int cpu) | ||
293 | { | ||
294 | edf_domain_init(&dom->domain, check, release); | ||
295 | dom->cpu = cpu; | ||
296 | dom->scheduled = NULL; | ||
297 | dom->timer_armed = 0; | ||
298 | hrtimer_init(&dom->enforcement_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
299 | dom->enforcement_timer.function = on_enforcement_timeout; | ||
300 | } | ||
301 | |||
302 | static void wm_requeue_remote(struct task_struct *t) | ||
303 | { | ||
304 | wm_domain_t *dom = domain_of_task(t); | ||
305 | |||
306 | set_rt_flags(t, RT_F_RUNNING); | ||
307 | if (is_released(t, litmus_clock())) | ||
308 | /* acquires necessary lock */ | ||
309 | add_ready(&dom->domain, t); | ||
310 | else | ||
311 | /* force timer on remote CPU */ | ||
312 | add_release_on(&dom->domain, t, get_partition(t)); | ||
313 | } | ||
314 | |||
315 | static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf) | ||
316 | { | ||
317 | if (t->state != TASK_RUNNING) | ||
318 | TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
319 | |||
320 | set_rt_flags(t, RT_F_RUNNING); | ||
321 | if (is_released(t, litmus_clock())) | ||
322 | __add_ready(edf, t); | ||
323 | else | ||
324 | add_release(edf, t); /* it has got to wait */ | ||
325 | } | ||
326 | |||
327 | static int wm_check_resched(rt_domain_t *edf) | ||
328 | { | ||
329 | wm_domain_t *dom = container_of(edf, wm_domain_t, domain); | ||
330 | |||
331 | /* because this is a callback from rt_domain_t we already hold | ||
332 | * the necessary lock for the ready queue | ||
333 | */ | ||
334 | if (edf_preemption_needed(edf, dom->scheduled)) { | ||
335 | preempt(dom); | ||
336 | return 1; | ||
337 | } else | ||
338 | return 0; | ||
339 | } | ||
340 | |||
341 | static void regular_job_completion(struct task_struct* t, int forced) | ||
342 | { | ||
343 | sched_trace_task_completion(t, forced); | ||
344 | TRACE_TASK(t, "job_completion().\n"); | ||
345 | |||
346 | set_rt_flags(t, RT_F_SLEEP); | ||
347 | prepare_for_next_period(t); | ||
348 | } | ||
349 | |||
350 | static void wm_job_or_slice_completion(struct task_struct* t, | ||
351 | int completion_signaled) | ||
352 | { | ||
353 | if (is_sliced_task(t)) | ||
354 | advance_next_slice(t, completion_signaled); | ||
355 | else | ||
356 | regular_job_completion(t, !completion_signaled); | ||
357 | } | ||
358 | |||
359 | static void wm_tick(struct task_struct *t) | ||
360 | { | ||
361 | wm_domain_t *dom = local_domain; | ||
362 | |||
363 | /* Check for inconsistency. We don't need the lock for this since | ||
364 | * ->scheduled is only changed in schedule, which obviously is not | ||
365 | * executing in parallel on this CPU | ||
366 | */ | ||
367 | BUG_ON(is_realtime(t) && t != dom->scheduled); | ||
368 | |||
369 | if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) { | ||
370 | set_tsk_need_resched(t); | ||
371 | TRACE_DOM(dom, "budget of %d exhausted in tick\n", | ||
372 | t->pid); | ||
373 | } | ||
374 | } | ||
375 | |||
376 | static struct task_struct* wm_schedule(struct task_struct * prev) | ||
377 | { | ||
378 | wm_domain_t *dom = local_domain; | ||
379 | rt_domain_t *edf = &dom->domain; | ||
380 | struct task_struct *next, *migrate = NULL; | ||
381 | |||
382 | int out_of_time, sleep, preempt, wrong_cpu, exists, blocks, resched; | ||
383 | |||
384 | raw_spin_lock(&dom->slock); | ||
385 | |||
386 | /* Sanity checking: | ||
387 | * When a task exits (dead) dom->schedule may be null | ||
388 | * and prev _is_ realtime. */ | ||
389 | BUG_ON(dom->scheduled && dom->scheduled != prev); | ||
390 | BUG_ON(dom->scheduled && !is_realtime(prev)); | ||
391 | |||
392 | /* (0) Determine state */ | ||
393 | exists = dom->scheduled != NULL; | ||
394 | wrong_cpu = exists && get_partition(dom->scheduled) != dom->cpu; | ||
395 | blocks = exists && !is_running(dom->scheduled); | ||
396 | out_of_time = exists | ||
397 | && budget_enforced(dom->scheduled) | ||
398 | && wm_budget_exhausted(dom->scheduled); | ||
399 | sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP; | ||
400 | preempt = edf_preemption_needed(edf, prev); | ||
401 | |||
402 | /* If we need to preempt do so. | ||
403 | * The following checks set resched to 1 in case of special | ||
404 | * circumstances. | ||
405 | */ | ||
406 | resched = preempt; | ||
407 | |||
408 | |||
409 | if (exists) | ||
410 | TRACE_TASK(prev, | ||
411 | "blocks:%d out_of_time:%d sleep:%d preempt:%d " | ||
412 | "wrong_cpu:%d state:%d sig:%d\n", | ||
413 | blocks, out_of_time, sleep, preempt, wrong_cpu, | ||
414 | prev->state, signal_pending(prev)); | ||
415 | |||
416 | /* If a task blocks we have no choice but to reschedule. | ||
417 | */ | ||
418 | if (blocks) | ||
419 | resched = 1; | ||
420 | |||
421 | /* This can happen if sliced task was moved to the next slice | ||
422 | * by the wake_up() code path while still being scheduled. | ||
423 | */ | ||
424 | if (wrong_cpu) | ||
425 | resched = 1; | ||
426 | |||
427 | /* Any task that is preemptable and either exhausts its execution | ||
428 | * budget or wants to sleep completes. We may have to reschedule after | ||
429 | * this. | ||
430 | */ | ||
431 | if ((out_of_time || sleep) && !blocks) { | ||
432 | wm_job_or_slice_completion(dom->scheduled, sleep); | ||
433 | resched = 1; | ||
434 | } | ||
435 | |||
436 | /* The final scheduling decision. Do we need to switch for some reason? | ||
437 | * Switch if we are in RT mode and have no task or if we need to | ||
438 | * resched. | ||
439 | */ | ||
440 | next = NULL; | ||
441 | if (resched || !exists) { | ||
442 | if (dom->scheduled && !blocks) { | ||
443 | if (get_partition(dom->scheduled) == dom->cpu) | ||
444 | /* local task */ | ||
445 | wm_requeue_local(dom->scheduled, edf); | ||
446 | else | ||
447 | /* not local anymore; wait until we drop the | ||
448 | * ready queue lock */ | ||
449 | migrate = dom->scheduled; | ||
450 | } | ||
451 | next = __take_ready(edf); | ||
452 | } else | ||
453 | /* Only override Linux scheduler if we have a real-time task | ||
454 | * scheduled that needs to continue. */ | ||
455 | if (exists) | ||
456 | next = prev; | ||
457 | |||
458 | if (next) { | ||
459 | TRACE_TASK(next, "scheduled at %llu (state:%d/%d)\n", litmus_clock(), | ||
460 | next->state, is_running(next)); | ||
461 | set_rt_flags(next, RT_F_RUNNING); | ||
462 | } else if (exists) { | ||
463 | TRACE("becoming idle at %llu\n", litmus_clock()); | ||
464 | } | ||
465 | |||
466 | dom->scheduled = next; | ||
467 | raw_spin_unlock(&dom->slock); | ||
468 | |||
469 | /* check if we need to push the previous task onto another queue */ | ||
470 | if (migrate) { | ||
471 | TRACE_TASK(migrate, "schedule-initiated migration to %d\n", | ||
472 | get_partition(migrate)); | ||
473 | wm_requeue_remote(migrate); | ||
474 | } | ||
475 | |||
476 | if (next && budget_precisely_enforced(next)) { | ||
477 | /* Make sure we call into the scheduler when this budget | ||
478 | * expires. */ | ||
479 | arm_enforcement_timer(dom, next); | ||
480 | } else if (dom->timer_armed) { | ||
481 | /* Make sure we don't cause unnecessary interrupts. */ | ||
482 | cancel_enforcement_timer(dom); | ||
483 | } | ||
484 | |||
485 | return next; | ||
486 | } | ||
487 | |||
488 | |||
489 | /* Prepare a task for running in RT mode | ||
490 | */ | ||
491 | static void wm_task_new(struct task_struct * t, int on_rq, int running) | ||
492 | { | ||
493 | wm_domain_t* dom = domain_of_task(t); | ||
494 | rt_domain_t* edf = &dom->domain; | ||
495 | unsigned long flags; | ||
496 | |||
497 | TRACE_TASK(t, "edf-wm: task new, cpu = %d\n", | ||
498 | t->rt_param.task_params.cpu); | ||
499 | |||
500 | /* setup job parameters */ | ||
501 | release_at(t, litmus_clock()); | ||
502 | |||
503 | /* The task should be running in the queue, otherwise signal | ||
504 | * code will try to wake it up with fatal consequences. | ||
505 | */ | ||
506 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
507 | |||
508 | if (is_sliced_task(t)) { | ||
509 | /* make sure parameters are initialized consistently */ | ||
510 | tsk_rt(t)->semi_part.wm.exec_time = 0; | ||
511 | tsk_rt(t)->semi_part.wm.job_release = get_release(t); | ||
512 | tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t); | ||
513 | tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices; | ||
514 | tsk_rt(t)->job_params.exec_time = 0; | ||
515 | } | ||
516 | |||
517 | if (running) { | ||
518 | /* there shouldn't be anything else running at the time */ | ||
519 | BUG_ON(dom->scheduled); | ||
520 | dom->scheduled = t; | ||
521 | } else { | ||
522 | wm_requeue_local(t, edf); | ||
523 | /* maybe we have to reschedule */ | ||
524 | preempt(dom); | ||
525 | } | ||
526 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
527 | } | ||
528 | |||
529 | static void wm_release_at(struct task_struct *t, lt_t start) | ||
530 | { | ||
531 | struct rt_param* p = tsk_rt(t); | ||
532 | |||
533 | if (is_sliced_task(t)) { | ||
534 | /* simulate wrapping to the first slice */ | ||
535 | p->semi_part.wm.job_deadline = start; | ||
536 | p->semi_part.wm.slice = get_last_slice(t); | ||
537 | /* FIXME: creates bogus completion event... */ | ||
538 | advance_next_slice(t, 0); | ||
539 | set_rt_flags(t, RT_F_RUNNING); | ||
540 | } else | ||
541 | /* generic code handles it */ | ||
542 | release_at(t, start); | ||
543 | } | ||
544 | |||
545 | static lt_t wm_earliest_release(struct task_struct *t, lt_t now) | ||
546 | { | ||
547 | lt_t deadline; | ||
548 | if (is_sliced_task(t)) | ||
549 | deadline = tsk_rt(t)->semi_part.wm.job_deadline; | ||
550 | else | ||
551 | deadline = get_deadline(t); | ||
552 | if (lt_before(deadline, now)) | ||
553 | return now; | ||
554 | else | ||
555 | return deadline; | ||
556 | } | ||
557 | |||
558 | static void wm_task_wake_up(struct task_struct *t) | ||
559 | { | ||
560 | unsigned long flags; | ||
561 | wm_domain_t* dom = domain_of_task(t); | ||
562 | rt_domain_t* edf = &dom->domain; | ||
563 | struct rt_param* p = tsk_rt(t); | ||
564 | lt_t now, sleep_time; | ||
565 | int migrate = 0; | ||
566 | |||
567 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
568 | BUG_ON(is_queued(t)); | ||
569 | |||
570 | now = litmus_clock(); | ||
571 | |||
572 | sleep_time = now - p->semi_part.wm.suspend_time; | ||
573 | |||
574 | TRACE_TASK(t, "wake_up at %llu after %llu, still-scheduled:%d\n", | ||
575 | now, sleep_time, dom->scheduled == t); | ||
576 | |||
577 | /* account sleep time as execution time */ | ||
578 | if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) { | ||
579 | /* new sporadic release */ | ||
580 | TRACE_TASK(t, "new sporadic release\n"); | ||
581 | wm_release_at(t, wm_earliest_release(t, now)); | ||
582 | sched_trace_task_release(t); | ||
583 | } else if (is_sliced_task(t)) { | ||
584 | /* figure out which slice we should be executing on */ | ||
585 | fast_forward_slices(t, sleep_time); | ||
586 | /* can't be exhausted now */ | ||
587 | BUG_ON(wm_budget_exhausted(t)); | ||
588 | } else { | ||
589 | /* simply add to the execution time */ | ||
590 | tsk_rt(t)->job_params.exec_time += sleep_time; | ||
591 | } | ||
592 | |||
593 | |||
594 | /* Only add to ready queue if it is not the currently-scheduled | ||
595 | * task. This could be the case if a task was woken up concurrently | ||
596 | * on a remote CPU before the executing CPU got around to actually | ||
597 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
598 | * and won. | ||
599 | */ | ||
600 | if (dom->scheduled != t) { | ||
601 | if (get_partition(t) == dom->cpu) | ||
602 | wm_requeue_local(t, edf); | ||
603 | else | ||
604 | /* post-pone migration until after unlocking */ | ||
605 | migrate = 1; | ||
606 | } | ||
607 | |||
608 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
609 | |||
610 | if (migrate) { | ||
611 | TRACE_TASK(t, "wake_up-initiated migration to %d\n", | ||
612 | get_partition(t)); | ||
613 | wm_requeue_remote(t); | ||
614 | } | ||
615 | |||
616 | TRACE_TASK(t, "wake up done\n"); | ||
617 | } | ||
618 | |||
619 | static void wm_task_block(struct task_struct *t) | ||
620 | { | ||
621 | wm_domain_t* dom = domain_of_task(t); | ||
622 | unsigned long flags; | ||
623 | lt_t now = litmus_clock(); | ||
624 | |||
625 | TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state); | ||
626 | |||
627 | tsk_rt(t)->semi_part.wm.suspend_time = now; | ||
628 | |||
629 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
630 | if (is_queued(t)) { | ||
631 | TRACE_TASK(t, "still queued; migration invariant failed?\n"); | ||
632 | remove(&dom->domain, t); | ||
633 | } | ||
634 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
635 | |||
636 | BUG_ON(!is_realtime(t)); | ||
637 | } | ||
638 | |||
639 | static void wm_task_exit(struct task_struct * t) | ||
640 | { | ||
641 | unsigned long flags; | ||
642 | wm_domain_t* dom = domain_of_task(t); | ||
643 | rt_domain_t* edf = &dom->domain; | ||
644 | |||
645 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
646 | if (is_queued(t)) { | ||
647 | /* dequeue */ | ||
648 | remove(edf, t); | ||
649 | } | ||
650 | if (dom->scheduled == t) | ||
651 | dom->scheduled = NULL; | ||
652 | |||
653 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
654 | |||
655 | preempt(dom); | ||
656 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
657 | } | ||
658 | |||
659 | static long wm_check_params(struct task_struct *t) | ||
660 | { | ||
661 | struct rt_param* p = tsk_rt(t); | ||
662 | struct edf_wm_params* wm = &p->task_params.semi_part.wm; | ||
663 | int i; | ||
664 | lt_t tmp; | ||
665 | |||
666 | if (!is_sliced_task(t)) { | ||
667 | /* regular task; nothing to check */ | ||
668 | TRACE_TASK(t, "accepted regular (non-sliced) task with " | ||
669 | "%d slices\n", | ||
670 | wm->count); | ||
671 | return 0; | ||
672 | } | ||
673 | |||
674 | /* (1) Either not sliced, or more than 1 slice. */ | ||
675 | if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) { | ||
676 | TRACE_TASK(t, "bad number of slices (%u) \n", | ||
677 | wm->count); | ||
678 | return -EINVAL; | ||
679 | } | ||
680 | |||
681 | /* (2) The partition has to agree with the first slice. */ | ||
682 | if (get_partition(t) != wm->slices[0].cpu) { | ||
683 | TRACE_TASK(t, "partition and first slice CPU differ " | ||
684 | "(%d != %d)\n", get_partition(t), wm->slices[0].cpu); | ||
685 | return -EINVAL; | ||
686 | } | ||
687 | |||
688 | /* (3) The total budget must agree. */ | ||
689 | for (i = 0, tmp = 0; i < wm->count; i++) | ||
690 | tmp += wm->slices[i].budget; | ||
691 | if (get_exec_cost(t) != tmp) { | ||
692 | TRACE_TASK(t, "total budget and sum of slice budgets differ\n"); | ||
693 | return -EINVAL; | ||
694 | } | ||
695 | |||
696 | /* (4) The release of each slice must not precede the previous | ||
697 | * deadline. */ | ||
698 | for (i = 0; i < wm->count - 1; i++) | ||
699 | if (wm->slices[i].deadline > wm->slices[i + 1].offset) { | ||
700 | TRACE_TASK(t, "slice %d overlaps with slice %d\n", | ||
701 | i, i + 1); | ||
702 | return -EINVAL; | ||
703 | } | ||
704 | |||
705 | /* (5) The budget of each slice must fit within [offset, deadline] */ | ||
706 | for (i = 0; i < wm->count; i++) | ||
707 | if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) || | ||
708 | wm->slices[i].deadline - wm->slices[i].offset < | ||
709 | wm->slices[i].budget) { | ||
710 | TRACE_TASK(t, "slice %d is overloaded\n", i); | ||
711 | return -EINVAL; | ||
712 | } | ||
713 | |||
714 | /* (6) The budget of each slice must exceed the minimum budget size. */ | ||
715 | for (i = 0; i < wm->count; i++) | ||
716 | if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) { | ||
717 | TRACE_TASK(t, "slice %d is too short\n", i); | ||
718 | return -EINVAL; | ||
719 | } | ||
720 | |||
721 | /* (7) The CPU of each slice must be different from the previous CPU. */ | ||
722 | for (i = 0; i < wm->count - 1; i++) | ||
723 | if (wm->slices[i].cpu == wm->slices[i + 1].cpu) { | ||
724 | TRACE_TASK(t, "slice %d does not migrate\n", i); | ||
725 | return -EINVAL; | ||
726 | } | ||
727 | |||
728 | /* (8) The CPU of each slice must be online. */ | ||
729 | for (i = 0; i < wm->count; i++) | ||
730 | if (!cpu_online(wm->slices[i].cpu)) { | ||
731 | TRACE_TASK(t, "slice %d is allocated on offline CPU\n", | ||
732 | i); | ||
733 | return -EINVAL; | ||
734 | } | ||
735 | |||
736 | /* (9) A sliced task's budget must be precisely enforced. */ | ||
737 | if (!budget_precisely_enforced(t)) { | ||
738 | TRACE_TASK(t, "budget is not precisely enforced " | ||
739 | "(policy: %d).\n", | ||
740 | tsk_rt(t)->task_params.budget_policy); | ||
741 | return -EINVAL; | ||
742 | } | ||
743 | |||
744 | TRACE_TASK(t, "accepted sliced task with %d slices\n", | ||
745 | wm->count); | ||
746 | |||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static long wm_admit_task(struct task_struct* t) | ||
751 | { | ||
752 | return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL; | ||
753 | } | ||
754 | |||
755 | /* Plugin object */ | ||
756 | static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = { | ||
757 | .plugin_name = "EDF-WM", | ||
758 | .tick = wm_tick, | ||
759 | .task_new = wm_task_new, | ||
760 | .complete_job = complete_job, | ||
761 | .task_exit = wm_task_exit, | ||
762 | .schedule = wm_schedule, | ||
763 | .release_at = wm_release_at, | ||
764 | .task_wake_up = wm_task_wake_up, | ||
765 | .task_block = wm_task_block, | ||
766 | .admit_task = wm_admit_task | ||
767 | }; | ||
768 | |||
769 | |||
770 | static int __init init_edf_wm(void) | ||
771 | { | ||
772 | int i; | ||
773 | |||
774 | /* FIXME: breaks with CPU hotplug | ||
775 | */ | ||
776 | for (i = 0; i < num_online_cpus(); i++) { | ||
777 | wm_domain_init(remote_domain(i), | ||
778 | wm_check_resched, | ||
779 | NULL, i); | ||
780 | } | ||
781 | return register_sched_plugin(&edf_wm_plugin); | ||
782 | } | ||
783 | |||
784 | module_init(init_edf_wm); | ||
785 | |||