aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2013-02-12 13:18:11 -0500
committerBjoern Brandenburg <bbb@mpi-sws.org>2014-06-07 05:31:11 -0400
commitf448dad0834990e454e7632afe0ce4227cc71f09 (patch)
treef803c9ac2def4c62276a4721cb6662fea453ec81
parent23f3fc1efa2a1fa076372331de12df3998298ab3 (diff)
Add C-EDF scheduler plugin
-rw-r--r--litmus/Kconfig10
-rw-r--r--litmus/Makefile2
-rw-r--r--litmus/sched_cedf.c903
3 files changed, 914 insertions, 1 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index fdf31f3dd6c2..38d9e433b345 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -2,6 +2,16 @@ menu "LITMUS^RT"
2 2
3menu "Scheduling" 3menu "Scheduling"
4 4
5config PLUGIN_CEDF
6 bool "Clustered-EDF"
7 depends on X86 && SYSFS
8 default y
9 help
10 Include the Clustered EDF (C-EDF) plugin in the kernel.
11 This is appropriate for large platforms with shared caches.
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches.
14
5config RELEASE_MASTER 15config RELEASE_MASTER
6 bool "Release-master Support" 16 bool "Release-master Support"
7 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP 17 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
diff --git a/litmus/Makefile b/litmus/Makefile
index 2d2e0a584d04..8110a5ae1589 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -23,7 +23,7 @@ obj-y = sched_plugin.o litmus.o \
23 sched_psn_edf.o \ 23 sched_psn_edf.o \
24 sched_pfp.o 24 sched_pfp.o
25 25
26 26obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
27 27
28obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 28obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
29 29
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..b1454c032445
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,903 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32
33#include <linux/module.h>
34
35#include <litmus/litmus.h>
36#include <litmus/jobs.h>
37#include <litmus/preempt.h>
38#include <litmus/budget.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/edf_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h>
49#endif
50
51/* to configure the cluster size */
52#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h>
54
55/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
57 * all CPUs form a single cluster (just like GSN-EDF).
58 */
59static enum cache_level cluster_config = GLOBAL_CLUSTER;
60
61struct clusterdomain;
62
63/* cpu_entry_t - maintain the linked and scheduled state
64 *
65 * A cpu also contains a pointer to the cedf_domain_t cluster
66 * that owns it (struct clusterdomain*)
67 */
68typedef struct {
69 int cpu;
70 struct clusterdomain* cluster; /* owning cluster */
71 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn;
75} cpu_entry_t;
76
77/* one cpu_entry_t per CPU */
78DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
79
80#define set_will_schedule() \
81 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
82#define clear_will_schedule() \
83 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
84#define test_will_schedule(cpu) \
85 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
86
87/*
88 * In C-EDF there is a cedf domain _per_ cluster
89 * The number of clusters is dynamically determined accordingly to the
90 * total cpu number and the cluster size
91 */
92typedef struct clusterdomain {
93 /* rt_domain for this cluster */
94 rt_domain_t domain;
95 /* cpus in this cluster */
96 cpu_entry_t* *cpus;
97 /* map of this cluster cpus */
98 cpumask_var_t cpu_map;
99 /* the cpus queue themselves according to priority in here */
100 struct bheap_node *heap_node;
101 struct bheap cpu_heap;
102 /* lock for this cluster */
103#define cluster_lock domain.ready_lock
104} cedf_domain_t;
105
106/* a cedf_domain per cluster; allocation is done at init/activation time */
107cedf_domain_t *cedf;
108
109#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
110#define task_cpu_cluster(task) remote_cluster(get_partition(task))
111
112/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
113 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
114 * information during the initialization of the plugin (e.g., topology)
115#define WANT_ALL_SCHED_EVENTS
116 */
117#define VERBOSE_INIT
118
119static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
120{
121 cpu_entry_t *a, *b;
122 a = _a->value;
123 b = _b->value;
124 /* Note that a and b are inverted: we want the lowest-priority CPU at
125 * the top of the heap.
126 */
127 return edf_higher_prio(b->linked, a->linked);
128}
129
130/* update_cpu_position - Move the cpu entry to the correct place to maintain
131 * order in the cpu queue. Caller must hold cedf lock.
132 */
133static void update_cpu_position(cpu_entry_t *entry)
134{
135 cedf_domain_t *cluster = entry->cluster;
136
137 if (likely(bheap_node_in_heap(entry->hn)))
138 bheap_delete(cpu_lower_prio,
139 &cluster->cpu_heap,
140 entry->hn);
141
142 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
143}
144
145/* caller must hold cedf lock */
146static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
147{
148 struct bheap_node* hn;
149 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
150 return hn->value;
151}
152
153
154/* link_task_to_cpu - Update the link of a CPU.
155 * Handles the case where the to-be-linked task is already
156 * scheduled on a different CPU.
157 */
158static noinline void link_task_to_cpu(struct task_struct* linked,
159 cpu_entry_t *entry)
160{
161 cpu_entry_t *sched;
162 struct task_struct* tmp;
163 int on_cpu;
164
165 BUG_ON(linked && !is_realtime(linked));
166
167 /* Currently linked task is set to be unlinked. */
168 if (entry->linked) {
169 entry->linked->rt_param.linked_on = NO_CPU;
170 }
171
172 /* Link new task to CPU. */
173 if (linked) {
174 /* handle task is already scheduled somewhere! */
175 on_cpu = linked->rt_param.scheduled_on;
176 if (on_cpu != NO_CPU) {
177 sched = &per_cpu(cedf_cpu_entries, on_cpu);
178 /* this should only happen if not linked already */
179 BUG_ON(sched->linked == linked);
180
181 /* If we are already scheduled on the CPU to which we
182 * wanted to link, we don't need to do the swap --
183 * we just link ourselves to the CPU and depend on
184 * the caller to get things right.
185 */
186 if (entry != sched) {
187 TRACE_TASK(linked,
188 "already scheduled on %d, updating link.\n",
189 sched->cpu);
190 tmp = sched->linked;
191 linked->rt_param.linked_on = sched->cpu;
192 sched->linked = linked;
193 update_cpu_position(sched);
194 linked = tmp;
195 }
196 }
197 if (linked) /* might be NULL due to swap */
198 linked->rt_param.linked_on = entry->cpu;
199 }
200 entry->linked = linked;
201#ifdef WANT_ALL_SCHED_EVENTS
202 if (linked)
203 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
204 else
205 TRACE("NULL linked to %d.\n", entry->cpu);
206#endif
207 update_cpu_position(entry);
208}
209
210/* unlink - Make sure a task is not linked any longer to an entry
211 * where it was linked before. Must hold cedf_lock.
212 */
213static noinline void unlink(struct task_struct* t)
214{
215 cpu_entry_t *entry;
216
217 if (t->rt_param.linked_on != NO_CPU) {
218 /* unlink */
219 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
220 t->rt_param.linked_on = NO_CPU;
221 link_task_to_cpu(NULL, entry);
222 } else if (is_queued(t)) {
223 /* This is an interesting situation: t is scheduled,
224 * but was just recently unlinked. It cannot be
225 * linked anywhere else (because then it would have
226 * been relinked to this CPU), thus it must be in some
227 * queue. We must remove it from the list in this
228 * case.
229 *
230 * in C-EDF case is should be somewhere in the queue for
231 * its domain, therefore and we can get the domain using
232 * task_cpu_cluster
233 */
234 remove(&(task_cpu_cluster(t))->domain, t);
235 }
236}
237
238
239/* preempt - force a CPU to reschedule
240 */
241static void preempt(cpu_entry_t *entry)
242{
243 preempt_if_preemptable(entry->scheduled, entry->cpu);
244}
245
246/* requeue - Put an unlinked task into gsn-edf domain.
247 * Caller must hold cedf_lock.
248 */
249static noinline void requeue(struct task_struct* task)
250{
251 cedf_domain_t *cluster = task_cpu_cluster(task);
252 BUG_ON(!task);
253 /* sanity check before insertion */
254 BUG_ON(is_queued(task));
255
256 if (is_early_releasing(task) || is_released(task, litmus_clock()))
257 __add_ready(&cluster->domain, task);
258 else {
259 /* it has got to wait */
260 add_release(&cluster->domain, task);
261 }
262}
263
264#ifdef CONFIG_SCHED_CPU_AFFINITY
265static cpu_entry_t* cedf_get_nearest_available_cpu(
266 cedf_domain_t *cluster, cpu_entry_t *start)
267{
268 cpu_entry_t *affinity;
269
270 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
271#ifdef CONFIG_RELEASE_MASTER
272 cluster->domain.release_master
273#else
274 NO_CPU
275#endif
276 );
277
278 /* make sure CPU is in our cluster */
279 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
280 return(affinity);
281 else
282 return(NULL);
283}
284#endif
285
286
287/* check for any necessary preemptions */
288static void check_for_preemptions(cedf_domain_t *cluster)
289{
290 struct task_struct *task;
291 cpu_entry_t *last;
292
293#ifdef CONFIG_PREFER_LOCAL_LINKING
294 cpu_entry_t *local;
295
296 /* Before linking to other CPUs, check first whether the local CPU is
297 * idle. */
298 local = &__get_cpu_var(cedf_cpu_entries);
299 task = __peek_ready(&cluster->domain);
300
301 if (task && !local->linked
302#ifdef CONFIG_RELEASE_MASTER
303 && likely(local->cpu != cluster->domain.release_master)
304#endif
305 ) {
306 task = __take_ready(&cluster->domain);
307 TRACE_TASK(task, "linking to local CPU %d to avoid IPI\n", local->cpu);
308 link_task_to_cpu(task, local);
309 preempt(local);
310 }
311#endif
312
313
314 for(last = lowest_prio_cpu(cluster);
315 edf_preemption_needed(&cluster->domain, last->linked);
316 last = lowest_prio_cpu(cluster)) {
317 /* preemption necessary */
318 task = __take_ready(&cluster->domain);
319 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
320 task->pid, last->cpu);
321#ifdef CONFIG_SCHED_CPU_AFFINITY
322 {
323 cpu_entry_t *affinity =
324 cedf_get_nearest_available_cpu(cluster,
325 &per_cpu(cedf_cpu_entries, task_cpu(task)));
326 if(affinity)
327 last = affinity;
328 else if(requeue_preempted_job(last->linked))
329 requeue(last->linked);
330 }
331#else
332 if (requeue_preempted_job(last->linked))
333 requeue(last->linked);
334#endif
335 link_task_to_cpu(task, last);
336 preempt(last);
337 }
338}
339
340/* cedf_job_arrival: task is either resumed or released */
341static noinline void cedf_job_arrival(struct task_struct* task)
342{
343 cedf_domain_t *cluster = task_cpu_cluster(task);
344 BUG_ON(!task);
345
346 requeue(task);
347 check_for_preemptions(cluster);
348}
349
350static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
351{
352 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
353 unsigned long flags;
354
355 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
356
357 __merge_ready(&cluster->domain, tasks);
358 check_for_preemptions(cluster);
359
360 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
361}
362
363/* caller holds cedf_lock */
364static noinline void job_completion(struct task_struct *t, int forced)
365{
366 BUG_ON(!t);
367
368 sched_trace_task_completion(t, forced);
369
370 TRACE_TASK(t, "job_completion().\n");
371
372 /* set flags */
373 tsk_rt(t)->completed = 0;
374 /* prepare for next period */
375 prepare_for_next_period(t);
376 if (is_early_releasing(t) || is_released(t, litmus_clock()))
377 sched_trace_task_release(t);
378 /* unlink */
379 unlink(t);
380 /* requeue
381 * But don't requeue a blocking task. */
382 if (is_running(t))
383 cedf_job_arrival(t);
384}
385
386/* Getting schedule() right is a bit tricky. schedule() may not make any
387 * assumptions on the state of the current task since it may be called for a
388 * number of reasons. The reasons include a scheduler_tick() determined that it
389 * was necessary, because sys_exit_np() was called, because some Linux
390 * subsystem determined so, or even (in the worst case) because there is a bug
391 * hidden somewhere. Thus, we must take extreme care to determine what the
392 * current state is.
393 *
394 * The CPU could currently be scheduling a task (or not), be linked (or not).
395 *
396 * The following assertions for the scheduled task could hold:
397 *
398 * - !is_running(scheduled) // the job blocks
399 * - scheduled->timeslice == 0 // the job completed (forcefully)
400 * - is_completed() // the job completed (by syscall)
401 * - linked != scheduled // we need to reschedule (for any reason)
402 * - is_np(scheduled) // rescheduling must be delayed,
403 * sys_exit_np must be requested
404 *
405 * Any of these can occur together.
406 */
407static struct task_struct* cedf_schedule(struct task_struct * prev)
408{
409 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
410 cedf_domain_t *cluster = entry->cluster;
411 int out_of_time, sleep, preempt, np, exists, blocks;
412 struct task_struct* next = NULL;
413
414#ifdef CONFIG_RELEASE_MASTER
415 /* Bail out early if we are the release master.
416 * The release master never schedules any real-time tasks.
417 */
418 if (unlikely(cluster->domain.release_master == entry->cpu)) {
419 sched_state_task_picked();
420 return NULL;
421 }
422#endif
423
424 raw_spin_lock(&cluster->cluster_lock);
425 clear_will_schedule();
426
427 /* sanity checking */
428 BUG_ON(entry->scheduled && entry->scheduled != prev);
429 BUG_ON(entry->scheduled && !is_realtime(prev));
430 BUG_ON(is_realtime(prev) && !entry->scheduled);
431
432 /* (0) Determine state */
433 exists = entry->scheduled != NULL;
434 blocks = exists && !is_running(entry->scheduled);
435 out_of_time = exists &&
436 budget_enforced(entry->scheduled) &&
437 budget_exhausted(entry->scheduled);
438 np = exists && is_np(entry->scheduled);
439 sleep = exists && is_completed(entry->scheduled);
440 preempt = entry->scheduled != entry->linked;
441
442#ifdef WANT_ALL_SCHED_EVENTS
443 TRACE_TASK(prev, "invoked cedf_schedule.\n");
444#endif
445
446 if (exists)
447 TRACE_TASK(prev,
448 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
449 "state:%d sig:%d\n",
450 blocks, out_of_time, np, sleep, preempt,
451 prev->state, signal_pending(prev));
452 if (entry->linked && preempt)
453 TRACE_TASK(prev, "will be preempted by %s/%d\n",
454 entry->linked->comm, entry->linked->pid);
455
456
457 /* If a task blocks we have no choice but to reschedule.
458 */
459 if (blocks)
460 unlink(entry->scheduled);
461
462 /* Request a sys_exit_np() call if we would like to preempt but cannot.
463 * We need to make sure to update the link structure anyway in case
464 * that we are still linked. Multiple calls to request_exit_np() don't
465 * hurt.
466 */
467 if (np && (out_of_time || preempt || sleep)) {
468 unlink(entry->scheduled);
469 request_exit_np(entry->scheduled);
470 }
471
472 /* Any task that is preemptable and either exhausts its execution
473 * budget or wants to sleep completes. We may have to reschedule after
474 * this. Don't do a job completion if we block (can't have timers running
475 * for blocked jobs).
476 */
477 if (!np && (out_of_time || sleep) && !blocks)
478 job_completion(entry->scheduled, !sleep);
479
480 /* Link pending task if we became unlinked.
481 */
482 if (!entry->linked)
483 link_task_to_cpu(__take_ready(&cluster->domain), entry);
484
485 /* The final scheduling decision. Do we need to switch for some reason?
486 * If linked is different from scheduled, then select linked as next.
487 */
488 if ((!np || blocks) &&
489 entry->linked != entry->scheduled) {
490 /* Schedule a linked job? */
491 if (entry->linked) {
492 entry->linked->rt_param.scheduled_on = entry->cpu;
493 next = entry->linked;
494 }
495 if (entry->scheduled) {
496 /* not gonna be scheduled soon */
497 entry->scheduled->rt_param.scheduled_on = NO_CPU;
498 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
499 }
500 } else
501 /* Only override Linux scheduler if we have a real-time task
502 * scheduled that needs to continue.
503 */
504 if (exists)
505 next = prev;
506
507 sched_state_task_picked();
508 raw_spin_unlock(&cluster->cluster_lock);
509
510#ifdef WANT_ALL_SCHED_EVENTS
511 TRACE("cedf_lock released, next=0x%p\n", next);
512
513 if (next)
514 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
515 else if (exists && !next)
516 TRACE("becomes idle at %llu.\n", litmus_clock());
517#endif
518
519
520 return next;
521}
522
523
524/* _finish_switch - we just finished the switch away from prev
525 */
526static void cedf_finish_switch(struct task_struct *prev)
527{
528 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
529
530 entry->scheduled = is_realtime(current) ? current : NULL;
531#ifdef WANT_ALL_SCHED_EVENTS
532 TRACE_TASK(prev, "switched away from\n");
533#endif
534}
535
536
537/* Prepare a task for running in RT mode
538 */
539static void cedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
540{
541 unsigned long flags;
542 cpu_entry_t* entry;
543 cedf_domain_t* cluster;
544
545 TRACE("gsn edf: task new %d\n", t->pid);
546
547 /* the cluster doesn't change even if t is scheduled */
548 cluster = task_cpu_cluster(t);
549
550 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
551
552 /* setup job params */
553 release_at(t, litmus_clock());
554
555 if (is_scheduled) {
556 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
557 BUG_ON(entry->scheduled);
558
559#ifdef CONFIG_RELEASE_MASTER
560 if (entry->cpu != cluster->domain.release_master) {
561#endif
562 entry->scheduled = t;
563 tsk_rt(t)->scheduled_on = task_cpu(t);
564#ifdef CONFIG_RELEASE_MASTER
565 } else {
566 /* do not schedule on release master */
567 preempt(entry); /* force resched */
568 tsk_rt(t)->scheduled_on = NO_CPU;
569 }
570#endif
571 } else {
572 t->rt_param.scheduled_on = NO_CPU;
573 }
574 t->rt_param.linked_on = NO_CPU;
575
576 if (is_running(t))
577 cedf_job_arrival(t);
578 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
579}
580
581static void cedf_task_wake_up(struct task_struct *task)
582{
583 unsigned long flags;
584 lt_t now;
585 cedf_domain_t *cluster;
586
587 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
588
589 cluster = task_cpu_cluster(task);
590
591 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
592 now = litmus_clock();
593 if (is_sporadic(task) && is_tardy(task, now)) {
594 /* new sporadic release */
595 release_at(task, now);
596 sched_trace_task_release(task);
597 }
598 cedf_job_arrival(task);
599 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
600}
601
602static void cedf_task_block(struct task_struct *t)
603{
604 unsigned long flags;
605 cedf_domain_t *cluster;
606
607 TRACE_TASK(t, "block at %llu\n", litmus_clock());
608
609 cluster = task_cpu_cluster(t);
610
611 /* unlink if necessary */
612 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
613 unlink(t);
614 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
615
616 BUG_ON(!is_realtime(t));
617}
618
619
620static void cedf_task_exit(struct task_struct * t)
621{
622 unsigned long flags;
623 cedf_domain_t *cluster = task_cpu_cluster(t);
624
625 /* unlink if necessary */
626 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
627 unlink(t);
628 if (tsk_rt(t)->scheduled_on != NO_CPU) {
629 cpu_entry_t *cpu;
630 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
631 cpu->scheduled = NULL;
632 tsk_rt(t)->scheduled_on = NO_CPU;
633 }
634 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
635
636 BUG_ON(!is_realtime(t));
637 TRACE_TASK(t, "RIP\n");
638}
639
640static long cedf_admit_task(struct task_struct* tsk)
641{
642 return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ?
643 0 : -EINVAL;
644}
645
646/* total number of cluster */
647static int num_clusters;
648/* we do not support cluster of different sizes */
649static unsigned int cluster_size;
650
651#ifdef VERBOSE_INIT
652static void print_cluster_topology(cpumask_var_t mask, int cpu)
653{
654 int chk;
655 char buf[255];
656
657 chk = cpulist_scnprintf(buf, 254, mask);
658 buf[chk] = '\0';
659 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
660
661}
662#endif
663
664static int clusters_allocated = 0;
665
666static void cleanup_cedf(void)
667{
668 int i;
669
670 if (clusters_allocated) {
671 for (i = 0; i < num_clusters; i++) {
672 kfree(cedf[i].cpus);
673 kfree(cedf[i].heap_node);
674 free_cpumask_var(cedf[i].cpu_map);
675 }
676
677 kfree(cedf);
678 }
679}
680
681static struct domain_proc_info cedf_domain_proc_info;
682static long cedf_get_domain_proc_info(struct domain_proc_info **ret)
683{
684 *ret = &cedf_domain_proc_info;
685 return 0;
686}
687
688static void cedf_setup_domain_proc(void)
689{
690 int i, cpu, domain;
691#ifdef CONFIG_RELEASE_MASTER
692 int release_master = atomic_read(&release_master_cpu);
693 /* skip over the domain with the release master if cluster size is 1 */
694 int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
695 release_master : NO_CPU;
696#else
697 int release_master = NO_CPU;
698 int skip_domain = NO_CPU;
699#endif
700 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
701 int num_rt_domains = num_clusters - (skip_domain != NO_CPU);
702 struct cd_mapping *map;
703
704 memset(&cedf_domain_proc_info, sizeof(cedf_domain_proc_info), 0);
705 init_domain_proc_info(&cedf_domain_proc_info, num_rt_cpus, num_rt_domains);
706 cedf_domain_proc_info.num_cpus = num_rt_cpus;
707 cedf_domain_proc_info.num_domains = num_rt_domains;
708
709 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
710 if (cpu == release_master)
711 continue;
712 map = &cedf_domain_proc_info.cpu_to_domains[i];
713 /* pointer math to figure out the domain index */
714 domain = remote_cluster(cpu) - cedf;
715 map->id = cpu;
716 cpumask_set_cpu(domain, map->mask);
717 ++i;
718 }
719
720 for (domain = 0, i = 0; domain < num_clusters; ++domain) {
721 if (domain == skip_domain)
722 continue;
723 map = &cedf_domain_proc_info.domain_to_cpus[i];
724 map->id = i;
725 cpumask_copy(map->mask, cedf[domain].cpu_map);
726 ++i;
727 }
728}
729
730static long cedf_activate_plugin(void)
731{
732 int i, j, cpu, ccpu, cpu_count;
733 cpu_entry_t *entry;
734
735 cpumask_var_t mask;
736 int chk = 0;
737
738 /* de-allocate old clusters, if any */
739 cleanup_cedf();
740
741 printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
742 cluster_config);
743
744 /* need to get cluster_size first */
745 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
746 return -ENOMEM;
747
748 if (cluster_config == GLOBAL_CLUSTER) {
749 cluster_size = num_online_cpus();
750 } else {
751 chk = get_shared_cpu_map(mask, 0, cluster_config);
752 if (chk) {
753 /* if chk != 0 then it is the max allowed index */
754 printk(KERN_INFO "C-EDF: Cluster configuration = %d "
755 "is not supported on this hardware.\n",
756 cluster_config);
757 /* User should notice that the configuration failed, so
758 * let's bail out. */
759 return -EINVAL;
760 }
761
762 cluster_size = cpumask_weight(mask);
763 }
764
765 if ((num_online_cpus() % cluster_size) != 0) {
766 /* this can't be right, some cpus are left out */
767 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
768 num_online_cpus(), cluster_size);
769 return -1;
770 }
771
772 num_clusters = num_online_cpus() / cluster_size;
773 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
774 num_clusters, cluster_size);
775
776 /* initialize clusters */
777 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
778 for (i = 0; i < num_clusters; i++) {
779
780 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
781 GFP_ATOMIC);
782 cedf[i].heap_node = kmalloc(
783 cluster_size * sizeof(struct bheap_node),
784 GFP_ATOMIC);
785 bheap_init(&(cedf[i].cpu_heap));
786 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
787
788 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
789 return -ENOMEM;
790#ifdef CONFIG_RELEASE_MASTER
791 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
792#endif
793 }
794
795 /* cycle through cluster and add cpus to them */
796 for (i = 0; i < num_clusters; i++) {
797
798 for_each_online_cpu(cpu) {
799 /* check if the cpu is already in a cluster */
800 for (j = 0; j < num_clusters; j++)
801 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
802 break;
803 /* if it is in a cluster go to next cpu */
804 if (j < num_clusters &&
805 cpumask_test_cpu(cpu, cedf[j].cpu_map))
806 continue;
807
808 /* this cpu isn't in any cluster */
809 /* get the shared cpus */
810 if (unlikely(cluster_config == GLOBAL_CLUSTER))
811 cpumask_copy(mask, cpu_online_mask);
812 else
813 get_shared_cpu_map(mask, cpu, cluster_config);
814
815 cpumask_copy(cedf[i].cpu_map, mask);
816#ifdef VERBOSE_INIT
817 print_cluster_topology(mask, cpu);
818#endif
819 /* add cpus to current cluster and init cpu_entry_t */
820 cpu_count = 0;
821 for_each_cpu(ccpu, cedf[i].cpu_map) {
822
823 entry = &per_cpu(cedf_cpu_entries, ccpu);
824 cedf[i].cpus[cpu_count] = entry;
825 atomic_set(&entry->will_schedule, 0);
826 entry->cpu = ccpu;
827 entry->cluster = &cedf[i];
828 entry->hn = &(cedf[i].heap_node[cpu_count]);
829 bheap_node_init(&entry->hn, entry);
830
831 cpu_count++;
832
833 entry->linked = NULL;
834 entry->scheduled = NULL;
835#ifdef CONFIG_RELEASE_MASTER
836 /* only add CPUs that should schedule jobs */
837 if (entry->cpu != entry->cluster->domain.release_master)
838#endif
839 update_cpu_position(entry);
840 }
841 /* done with this cluster */
842 break;
843 }
844 }
845
846 clusters_allocated = 1;
847 free_cpumask_var(mask);
848
849 cedf_setup_domain_proc();
850
851 return 0;
852}
853
854static long cedf_deactivate_plugin(void)
855{
856 destroy_domain_proc_info(&cedf_domain_proc_info);
857 return 0;
858}
859
860/* Plugin object */
861static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
862 .plugin_name = "C-EDF",
863 .finish_switch = cedf_finish_switch,
864 .task_new = cedf_task_new,
865 .complete_job = complete_job,
866 .task_exit = cedf_task_exit,
867 .schedule = cedf_schedule,
868 .task_wake_up = cedf_task_wake_up,
869 .task_block = cedf_task_block,
870 .admit_task = cedf_admit_task,
871 .activate_plugin = cedf_activate_plugin,
872 .deactivate_plugin = cedf_deactivate_plugin,
873 .get_domain_proc_info = cedf_get_domain_proc_info,
874};
875
876static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
877
878static int __init init_cedf(void)
879{
880 int err, fs;
881
882 err = register_sched_plugin(&cedf_plugin);
883 if (!err) {
884 fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
885 if (!fs)
886 cluster_file = create_cluster_file(cedf_dir, &cluster_config);
887 else
888 printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
889 }
890 return err;
891}
892
893static void clean_cedf(void)
894{
895 cleanup_cedf();
896 if (cluster_file)
897 remove_proc_entry("cluster", cedf_dir);
898 if (cedf_dir)
899 remove_plugin_proc_dir(&cedf_plugin);
900}
901
902module_init(init_cedf);
903module_exit(clean_cedf);