aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Erickson <jerickso@cs.unc.edu>2013-04-30 15:26:17 -0400
committerJeremy Erickson <jerickso@cs.unc.edu>2013-04-30 15:26:17 -0400
commit1e13b0322304cd2cb9667265503a388bf080ffe6 (patch)
tree4d2e24c22c76cc8245da72e079b764ced9f021f7
parent180eefef9e92ae1aad4883646499bb3bf1cc8399 (diff)
Initial EDF-os clone of EDF-fm
-rw-r--r--include/litmus/rt_param.h17
-rw-r--r--litmus/Makefile3
-rw-r--r--litmus/sched_edf_os.c571
3 files changed, 590 insertions, 1 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 6f43deacb3e1..9e85728d4380 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -60,6 +60,20 @@ struct edffm_params {
60 lt_t fraction[2][NR_CPUS_EDF_FM]; 60 lt_t fraction[2][NR_CPUS_EDF_FM];
61}; 61};
62 62
63struct edfos_params {
64 /* EDF-os where can a migratory task execute? */
65 unsigned int cpus[NR_CPUS];
66 /* how many cpus are used by this task? */
67 unsigned int nr_cpus;
68 /* Fraction of this task exec_cost that each CPU should handle.
69 * We keep the fraction divided in num/denom : a matrix of
70 * (NR_CPUS rows) x (2 columns).
71 * The first column is the numerator of the fraction.
72 * The second column is the denominator.
73 */
74 lt_t fraction[2][NR_CPUS];
75};
76
63/* Parameters for NPS-F semi-partitioned scheduling algorithm. 77/* Parameters for NPS-F semi-partitioned scheduling algorithm.
64 * Each (cpu, budget) entry defines the share ('budget' in ns, a % of 78 * Each (cpu, budget) entry defines the share ('budget' in ns, a % of
65 * the slot_length) of the notional processor on the CPU 'cpu'. 79 * the slot_length) of the notional processor on the CPU 'cpu'.
@@ -114,6 +128,9 @@ struct rt_task {
114 /* EDF-Fm; defined in sched_edf_fm.c */ 128 /* EDF-Fm; defined in sched_edf_fm.c */
115 struct edffm_params fm; 129 struct edffm_params fm;
116 130
131 /* EDF-os; defined in sched_edf_os.c */
132 struct edfos_params os;
133
117 /* NPS-F; defined in sched_npsf.c 134 /* NPS-F; defined in sched_npsf.c
118 * id for the server (notional processor) that holds 135 * id for the server (notional processor) that holds
119 * this task; the same npfs_id can be assigned to "the same" 136 * this task; the same npfs_id can be assigned to "the same"
diff --git a/litmus/Makefile b/litmus/Makefile
index 5533a58eb684..b243093abc6d 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -17,7 +17,8 @@ obj-y = sched_plugin.o litmus.o \
17 sched_psn_edf.o \ 17 sched_psn_edf.o \
18 sched_edf_wm.o \ 18 sched_edf_wm.o \
19 sched_npsf.o \ 19 sched_npsf.o \
20 sched_edf_fm.o 20 sched_edf_fm.o \
21 sched_edf_os.o
21 22
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 23obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 24obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/sched_edf_os.c b/litmus/sched_edf_os.c
new file mode 100644
index 000000000000..984d0678ee5a
--- /dev/null
+++ b/litmus/sched_edf_os.c
@@ -0,0 +1,571 @@
1/*
2 * litmus/sched_edf_os.c
3 *
4 * Implementation of the EDF-os scheduling algorithm.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10#include <linux/spinlock.h>
11
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/edf_common.h>
18
19typedef struct {
20 rt_domain_t domain;
21 int cpu;
22 struct task_struct* scheduled; /* only RT tasks */
23/* domain lock */
24#define slock domain.ready_lock
25} edfos_domain_t;
26
27DEFINE_PER_CPU(edfos_domain_t, edfos_domains);
28
29#define local_edfos (&__get_cpu_var(edfos_domains))
30#define remote_edf(cpu) (&per_cpu(edfos_domains, cpu).domain)
31#define remote_edfos(cpu) (&per_cpu(edfos_domains, cpu))
32#define task_edf(task) remote_edf(get_partition(task))
33#define task_edfos(task) remote_edfos(get_partition(task))
34
35#define edfos_params(t) (t->rt_param.task_params.semi_part.os)
36
37/* Is the task a migratory task? */
38#define is_migrat_task(task) (edfos_params(task).nr_cpus)
39/* t is on the wrong CPU (it should be requeued properly) */
40#define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t))
41/* Get next CPU */
42#define migrat_next_cpu(t) \
43 ((tsk_rt(t)->task_params.cpu == edfos_params(t).cpus[0]) ? \
44 edfos_params(t).cpus[1] : \
45 edfos_params(t).cpus[0])
46/* Get current cpu */
47#define migrat_cur_cpu(t) \
48 ((tsk_rt(t)->task_params.cpu == edfos_params(t).cpus[0]) ? \
49 edfos_params(t).cpus[0] : \
50 edfos_params(t).cpus[1])
51/* Manipulate share for current cpu */
52#define cur_cpu_fract_num(t) \
53 ((tsk_rt(t)->task_params.cpu == edfos_params(t).cpus[0]) ? \
54 edfos_params(t).fraction[0][0] : \
55 edfos_params(t).fraction[0][1])
56#define cur_cpu_fract_den(t) \
57 ((tsk_rt(t)->task_params.cpu == edfos_params(t).cpus[0]) ? \
58 edfos_params(t).fraction[1][0] : \
59 edfos_params(t).fraction[1][1])
60/* Get job number for current cpu */
61#define cur_cpu_job_no(t) \
62 ((tsk_rt(t)->task_params.cpu == edfos_params(t).cpus[0]) ? \
63 tsk_rt(t)->semi_part.cpu_job_no[0] : \
64 tsk_rt(t)->semi_part.cpu_job_no[1])
65/* What is the current cpu position in the array? */
66#define edfos_cpu_pos(cpu,t) \
67 ((cpu == edfos_params(t).cpus[0]) ? \
68 0 : 1)
69
70/*
71 * EDF-os: migratory tasks have higher prio than fixed, EDF in both classes.
72 * (Both first and second may be NULL).
73 */
74int edfos_higher_prio(struct task_struct* first, struct task_struct* second)
75{
76 if ((first && edfos_params(first).nr_cpus) ||
77 (second && edfos_params(second).nr_cpus)) {
78 if ((first && edfos_params(first).nr_cpus) &&
79 (second && edfos_params(second).nr_cpus))
80 /* both are migrating */
81 return edf_higher_prio(first, second);
82
83 if (first && edfos_params(first).nr_cpus)
84 /* first is migrating */
85 return 1;
86 else
87 /* second is migrating */
88 return 0;
89 }
90
91 /* both are fixed or not real time */
92 return edf_higher_prio(first, second);
93}
94
95int edfos_ready_order(struct bheap_node* a, struct bheap_node* b)
96{
97 return edfos_higher_prio(bheap2task(a), bheap2task(b));
98}
99
100/* need_to_preempt - check whether the task t needs to be preempted
101 * call only with irqs disabled and with ready_lock acquired
102 */
103int edfos_preemption_needed(rt_domain_t* rt, struct task_struct *t)
104{
105 /* we need the read lock for edf_ready_queue */
106 /* no need to preempt if there is nothing pending */
107 if (!__jobs_pending(rt))
108 return 0;
109 /* we need to reschedule if t doesn't exist */
110 if (!t)
111 return 1;
112
113 /* make sure to get non-rt stuff out of the way */
114 return !is_realtime(t) || edfos_higher_prio(__next_ready(rt), t);
115}
116
117/* we assume the lock is being held */
118static void preempt(edfos_domain_t *edfos)
119{
120 preempt_if_preemptable(edfos->scheduled, edfos->cpu);
121}
122
123static void edfos_release_jobs(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 edfos_domain_t *edfos = container_of(rt, edfos_domain_t, domain);
127
128 raw_spin_lock_irqsave(&edfos->slock, flags);
129
130 __merge_ready(rt, tasks);
131
132 if (edfos_preemption_needed(rt, edfos->scheduled))
133 preempt(edfos);
134
135 raw_spin_unlock_irqrestore(&edfos->slock, flags);
136}
137
138/* EDF-os uses the "release_master" field to force the next release for
139 * the task 'task' to happen on a remote CPU. The remote cpu for task is
140 * previously set up during job_completion() taking into consideration
141 * whether a task is a migratory task or not.
142 */
143static inline void
144edfos_add_release_remote(struct task_struct *task)
145{
146 unsigned long flags;
147 rt_domain_t *rt = task_edf(task);
148
149 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
150
151 /* "modify" destination cpu */
152 rt->release_master = get_partition(task);
153
154 TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n",
155 smp_processor_id(), task_cpu(task), rt->release_master);
156
157 /* trigger future release */
158 __add_release(rt, task);
159
160 /* reset proper release_master and unlock */
161 rt->release_master = NO_CPU;
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164
165/* perform double ready_queue locking in an orderwise fashion
166 * this is called with: interrupt disabled and rq->lock held (from
167 * schedule())
168 */
169static noinline void double_domain_lock(edfos_domain_t *dom1, edfos_domain_t *dom2)
170{
171 if (dom1 == dom2) {
172 /* fake */
173 raw_spin_lock(&dom1->slock);
174 } else {
175 if (dom1 < dom2) {
176 raw_spin_lock(&dom1->slock);
177 raw_spin_lock(&dom2->slock);
178 TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu);
179 } else {
180 raw_spin_lock(&dom2->slock);
181 raw_spin_lock(&dom1->slock);
182 TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu);
183 }
184 }
185}
186
187/* Directly insert a task in a remote ready queue. This function
188 * should only be called if this task is a migrating task and its
189 * last job for this CPU just completed (a new one is released for
190 * a remote CPU), but the new job is already tardy.
191 */
192static noinline void insert_task_in_remote_ready(struct task_struct *task)
193{
194 edfos_domain_t *this = remote_edfos(task_cpu(task));
195 edfos_domain_t *remote = remote_edfos(get_partition(task));
196
197 BUG_ON(get_partition(task) != remote->cpu);
198
199 TRACE_TASK(task, "Migrate From P%d -> To P%d\n",
200 this->cpu, remote->cpu);
201 TRACE_TASK(task, "Inserting in remote ready queue\n");
202
203 WARN_ON(!irqs_disabled());
204
205 raw_spin_unlock(&this->slock);
206 mb();
207 TRACE_TASK(task,"edfos_lock %d released\n", this->cpu);
208
209 /* lock both ready queues */
210 double_domain_lock(this, remote);
211 mb();
212
213 __add_ready(&remote->domain, task);
214
215 /* release remote but keep ours */
216 raw_spin_unlock(&remote->slock);
217 TRACE_TASK(task,"edfos_lock %d released\n", remote->cpu);
218
219 /* ask remote cpu to reschedule, we are already rescheduling on this */
220 preempt(remote);
221}
222
223static void requeue(struct task_struct* t, rt_domain_t *edf)
224{
225 if (t->state != TASK_RUNNING)
226 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
227
228 set_rt_flags(t, RT_F_RUNNING);
229 if (is_released(t, litmus_clock())) {
230 if (wrong_cpu(t)) {
231 /* this should only happen if t just completed, but
232 * its next release is already tardy, so it should be
233 * migrated and inserted in the remote ready queue
234 */
235 TRACE_TASK(t, "Migrating task already released, "
236 "move from P%d to P%d\n",
237 task_cpu(t), get_partition(t));
238
239 insert_task_in_remote_ready(t);
240 } else {
241 /* not a migrat task or the job is on the right CPU */
242 __add_ready(edf, t);
243 }
244 } else {
245 if (wrong_cpu(t)) {
246
247 TRACE_TASK(t, "Migrating task, adding remote release\n");
248 edfos_add_release_remote(t);
249 } else {
250 TRACE_TASK(t, "Adding local release\n");
251 add_release(edf, t);
252 }
253 }
254}
255
256/* Update statistics for the _current_ job.
257 * - job_no was incremented _before_ starting this job
258 * (release_at / prepare_for_next_period)
259 * - cpu_job_no is incremented when the job completes
260 */
261static void update_job_counter(struct task_struct *t)
262{
263 int cpu_pos;
264
265 /* Which CPU counter should be incremented? */
266 cpu_pos = edfos_cpu_pos(t->rt_param.task_params.cpu, t);
267 t->rt_param.semi_part.cpu_job_no[cpu_pos]++;
268
269 TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n",
270 t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t),
271 t->rt_param.task_params.cpu);
272}
273
274/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */
275static int next_cpu_for_job(struct task_struct *t)
276{
277 BUG_ON(!is_migrat_task(t));
278
279 TRACE_TASK(t, "%u = %u * %u / %u\n",
280 t->rt_param.job_params.job_no, cur_cpu_job_no(t),
281 cur_cpu_fract_den(t), cur_cpu_fract_num(t));
282 if ((t->rt_param.job_params.job_no) ==
283 (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) /
284 cur_cpu_fract_num(t)))
285 return edfos_params(t).cpus[0];
286
287 return edfos_params(t).cpus[1];
288}
289
290/* If needed (the share for task t on this CPU is exhausted), updates
291 * the task_params.cpu for the _migrating_ task t
292 */
293static void change_migrat_cpu_if_needed(struct task_struct *t)
294{
295 BUG_ON(!is_migrat_task(t));
296 /* EDF-os: if it is a migrating task and it has already executed
297 * the required number of jobs on this CPU, we need to move it
298 * on its next CPU; changing the cpu here will affect the requeue
299 * and the next release
300 */
301 if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) {
302
303 tsk_rt(t)->task_params.cpu = migrat_next_cpu(t);
304 TRACE_TASK(t, "EDF-os: will migrate job %d -> %d\n",
305 task_cpu(t), tsk_rt(t)->task_params.cpu);
306 return;
307 }
308
309 TRACE_TASK(t, "EDF-os: job will stay on %d -> %d\n",
310 task_cpu(t), tsk_rt(t)->task_params.cpu);
311}
312
313static void job_completion(struct task_struct* t, int forced)
314{
315 sched_trace_task_completion(t,forced);
316 TRACE_TASK(t, "job_completion().\n");
317
318 if (unlikely(is_migrat_task(t))) {
319 update_job_counter(t);
320 change_migrat_cpu_if_needed(t);
321 }
322
323 set_rt_flags(t, RT_F_SLEEP);
324 prepare_for_next_period(t);
325}
326
327static void edfos_tick(struct task_struct *t)
328{
329 edfos_domain_t *edfos = local_edfos;
330
331 BUG_ON(is_realtime(t) && t != edfos->scheduled);
332
333 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
334 set_tsk_need_resched(t);
335 TRACE("edfos_scheduler_tick: "
336 "%d is preemptable "
337 " => FORCE_RESCHED\n", t->pid);
338 }
339}
340
341static struct task_struct* edfos_schedule(struct task_struct * prev)
342{
343 edfos_domain_t* edfos = local_edfos;
344 rt_domain_t* edf = &edfos->domain;
345 struct task_struct* next;
346
347 int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched;
348
349 raw_spin_lock(&edfos->slock);
350
351 BUG_ON(edfos->scheduled && edfos->scheduled != prev);
352 BUG_ON(edfos->scheduled && !is_realtime(prev));
353
354 /* (0) Determine state */
355 exists = edfos->scheduled != NULL;
356 blocks = exists && !is_running(edfos->scheduled);
357 out_of_time = exists &&
358 budget_enforced(edfos->scheduled) &&
359 budget_exhausted(edfos->scheduled);
360 sleep = exists && get_rt_flags(edfos->scheduled) == RT_F_SLEEP;
361 change_cpu = exists && wrong_cpu(edfos->scheduled);
362 preempt = edfos_preemption_needed(edf, prev);
363
364 BUG_ON(blocks && change_cpu);
365
366 if (exists)
367 TRACE_TASK(prev,
368 "blocks:%d out_of_time:%d sleep:%d preempt:%d "
369 "wrong_cpu:%d state:%d sig:%d\n",
370 blocks, out_of_time, sleep, preempt,
371 change_cpu, prev->state, signal_pending(prev));
372
373 /* If we need to preempt do so. */
374 resched = preempt;
375
376 /* If a task blocks we have no choice but to reschedule. */
377 if (blocks)
378 resched = 1;
379
380 /* If a task has just woken up, it was tardy and the wake up
381 * raced with this schedule, a new job has already been released,
382 * but scheduled should be enqueued on a remote ready queue, and a
383 * new task should be selected for the current queue.
384 */
385 if (change_cpu)
386 resched = 1;
387
388 /* Any task that is preemptable and either exhausts its execution
389 * budget or wants to sleep completes. We may have to reschedule after
390 * this.
391 */
392 if ((out_of_time || sleep) && !blocks) {
393 job_completion(edfos->scheduled, !sleep);
394 resched = 1;
395 }
396
397 /* The final scheduling decision. Do we need to switch for some reason?
398 * Switch if we are in RT mode and have no task or if we need to
399 * resched.
400 */
401 next = NULL;
402 if (resched || !exists) {
403
404 if (edfos->scheduled && !blocks)
405 requeue(edfos->scheduled, edf);
406 next = __take_ready(edf);
407 } else
408 /* Only override Linux scheduler if we have a real-time task
409 * scheduled that needs to continue.
410 */
411 if (exists)
412 next = prev;
413
414 if (next) {
415 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
416 set_rt_flags(next, RT_F_RUNNING);
417 } else {
418 TRACE("becoming idle at %llu\n", litmus_clock());
419 }
420
421 edfos->scheduled = next;
422 raw_spin_unlock(&edfos->slock);
423
424 return next;
425}
426
427/* Prepare a task for running in RT mode
428 */
429static void edfos_task_new(struct task_struct * t, int on_rq, int running)
430{
431 rt_domain_t* edf = task_edf(t);
432 edfos_domain_t* edfos = task_edfos(t);
433 unsigned long flags;
434
435 TRACE_TASK(t, "EDF-os: task new, cpu = %d\n",
436 t->rt_param.task_params.cpu);
437
438 release_at(t, litmus_clock());
439 update_job_counter(t);
440
441 /* The task should be running in the queue, otherwise signal
442 * code will try to wake it up with fatal consequences.
443 */
444 raw_spin_lock_irqsave(&edfos->slock, flags);
445 if (running) {
446 /* there shouldn't be anything else running at the time */
447 BUG_ON(edfos->scheduled);
448 edfos->scheduled = t;
449 } else {
450 requeue(t, edf);
451 /* maybe we have to reschedule */
452 preempt(edfos);
453 }
454 raw_spin_unlock_irqrestore(&edfos->slock, flags);
455}
456
457static void edfos_task_wake_up(struct task_struct *task)
458{
459 unsigned long flags;
460 edfos_domain_t* edfos = task_edfos(task);
461 rt_domain_t* edf = task_edf(task);
462 lt_t now;
463
464 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
465
466 TRACE_TASK(task, "acquire edfos %d\n", edfos->cpu);
467 raw_spin_lock_irqsave(&edfos->slock, flags);
468
469 BUG_ON(edfos != task_edfos(task));
470 BUG_ON(is_queued(task));
471
472 now = litmus_clock();
473 if (is_tardy(task, now)) {
474 if (unlikely(is_migrat_task(task))) {
475 /* a new job will be released.
476 * Update current job counter */
477 update_job_counter(task);
478 /* Switch CPU if needed */
479 change_migrat_cpu_if_needed(task);
480 }
481 /* new sporadic release */
482 TRACE_TASK(task, "release new\n");
483 release_at(task, now);
484 sched_trace_task_release(task);
485 }
486
487 /* Only add to ready queue if it is not the currently-scheduled
488 * task. This could be the case if a task was woken up concurrently
489 * on a remote CPU before the executing CPU got around to actually
490 * de-scheduling the task, i.e., wake_up() raced with schedule()
491 * and won.
492 */
493 if (edfos->scheduled != task)
494 requeue(task, edf);
495
496 raw_spin_unlock_irqrestore(&edfos->slock, flags);
497 TRACE_TASK(task, "release edfos %d\n", edfos->cpu);
498 TRACE_TASK(task, "wake up done\n");
499}
500
501static void edfos_task_block(struct task_struct *t)
502{
503 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
504
505 BUG_ON(!is_realtime(t));
506 if (is_queued(t)) {
507 edfos_domain_t *edfos = local_edfos;
508 TRACE_TASK(t, "task blocked, race with wakeup, "
509 "remove from queue %d\n", edfos->cpu);
510 remove(&edfos->domain, t);
511 }
512}
513
514static void edfos_task_exit(struct task_struct * t)
515{
516 unsigned long flags;
517 edfos_domain_t* edfos = task_edfos(t);
518 rt_domain_t* edf;
519
520 raw_spin_lock_irqsave(&edfos->slock, flags);
521 if (is_queued(t)) {
522 /* dequeue */
523 edf = task_edf(t);
524 remove(edf, t);
525 }
526 if (edfos->scheduled == t)
527 edfos->scheduled = NULL;
528
529 TRACE_TASK(t, "RIP\n");
530
531 preempt(edfos);
532 raw_spin_unlock_irqrestore(&edfos->slock, flags);
533}
534
535static long edfos_admit_task(struct task_struct* tsk)
536{
537 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
538}
539
540/* Plugin object */
541static struct sched_plugin edfos_plugin __cacheline_aligned_in_smp = {
542 .plugin_name = "EDF-os",
543 .tick = edfos_tick,
544 .task_new = edfos_task_new,
545 .complete_job = complete_job,
546 .task_exit = edfos_task_exit,
547 .schedule = edfos_schedule,
548 .task_wake_up = edfos_task_wake_up,
549 .task_block = edfos_task_block,
550 .admit_task = edfos_admit_task
551};
552
553static int __init init_edfos(void)
554{
555 int i;
556 edfos_domain_t *edfos;
557
558 /* Note, broken if num_online_cpus() may change */
559 for (i = 0; i < num_online_cpus(); i++) {
560 edfos = remote_edfos(i);
561 edfos->cpu = i;
562 edfos->scheduled = NULL;
563 rt_domain_init(&edfos->domain, edfos_ready_order, NULL,
564 edfos_release_jobs);
565 }
566
567 return register_sched_plugin(&edfos_plugin);
568}
569
570module_init(init_edfos);
571