aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--litmus/Kconfig10
-rw-r--r--litmus/Makefile2
-rw-r--r--litmus/sched_cedf.c890
3 files changed, 901 insertions, 1 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index fdf31f3dd6c2..38d9e433b345 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -2,6 +2,16 @@ menu "LITMUS^RT"
2 2
3menu "Scheduling" 3menu "Scheduling"
4 4
5config PLUGIN_CEDF
6 bool "Clustered-EDF"
7 depends on X86 && SYSFS
8 default y
9 help
10 Include the Clustered EDF (C-EDF) plugin in the kernel.
11 This is appropriate for large platforms with shared caches.
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches.
14
5config RELEASE_MASTER 15config RELEASE_MASTER
6 bool "Release-master Support" 16 bool "Release-master Support"
7 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP 17 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
diff --git a/litmus/Makefile b/litmus/Makefile
index fb12398c4b92..7d637197d736 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -23,7 +23,7 @@ obj-y = sched_plugin.o litmus.o \
23 sched_psn_edf.o \ 23 sched_psn_edf.o \
24 sched_pfp.o 24 sched_pfp.o
25 25
26 26obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
27 27
28obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 28obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
29obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 29obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..528efb611fdb
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,890 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32
33#include <linux/module.h>
34
35#include <litmus/litmus.h>
36#include <litmus/jobs.h>
37#include <litmus/preempt.h>
38#include <litmus/budget.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/edf_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h>
49#endif
50
51/* to configure the cluster size */
52#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h>
54
55/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
57 * all CPUs form a single cluster (just like GSN-EDF).
58 */
59static enum cache_level cluster_config = GLOBAL_CLUSTER;
60
61struct clusterdomain;
62
63/* cpu_entry_t - maintain the linked and scheduled state
64 *
65 * A cpu also contains a pointer to the cedf_domain_t cluster
66 * that owns it (struct clusterdomain*)
67 */
68typedef struct {
69 int cpu;
70 struct clusterdomain* cluster; /* owning cluster */
71 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn;
75} cpu_entry_t;
76
77/* one cpu_entry_t per CPU */
78DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
79
80/*
81 * In C-EDF there is a cedf domain _per_ cluster
82 * The number of clusters is dynamically determined accordingly to the
83 * total cpu number and the cluster size
84 */
85typedef struct clusterdomain {
86 /* rt_domain for this cluster */
87 rt_domain_t domain;
88 /* cpus in this cluster */
89 cpu_entry_t* *cpus;
90 /* map of this cluster cpus */
91 cpumask_var_t cpu_map;
92 /* the cpus queue themselves according to priority in here */
93 struct bheap_node *heap_node;
94 struct bheap cpu_heap;
95 /* lock for this cluster */
96#define cluster_lock domain.ready_lock
97} cedf_domain_t;
98
99/* a cedf_domain per cluster; allocation is done at init/activation time */
100cedf_domain_t *cedf;
101
102#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
103#define task_cpu_cluster(task) remote_cluster(get_partition(task))
104
105/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
106 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
107 * information during the initialization of the plugin (e.g., topology)
108#define WANT_ALL_SCHED_EVENTS
109 */
110#define VERBOSE_INIT
111
112static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
113{
114 cpu_entry_t *a, *b;
115 a = _a->value;
116 b = _b->value;
117 /* Note that a and b are inverted: we want the lowest-priority CPU at
118 * the top of the heap.
119 */
120 return edf_higher_prio(b->linked, a->linked);
121}
122
123/* update_cpu_position - Move the cpu entry to the correct place to maintain
124 * order in the cpu queue. Caller must hold cedf lock.
125 */
126static void update_cpu_position(cpu_entry_t *entry)
127{
128 cedf_domain_t *cluster = entry->cluster;
129
130 if (likely(bheap_node_in_heap(entry->hn)))
131 bheap_delete(cpu_lower_prio,
132 &cluster->cpu_heap,
133 entry->hn);
134
135 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
136}
137
138/* caller must hold cedf lock */
139static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
140{
141 struct bheap_node* hn;
142 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
143 return hn->value;
144}
145
146
147/* link_task_to_cpu - Update the link of a CPU.
148 * Handles the case where the to-be-linked task is already
149 * scheduled on a different CPU.
150 */
151static noinline void link_task_to_cpu(struct task_struct* linked,
152 cpu_entry_t *entry)
153{
154 cpu_entry_t *sched;
155 struct task_struct* tmp;
156 int on_cpu;
157
158 BUG_ON(linked && !is_realtime(linked));
159
160 /* Currently linked task is set to be unlinked. */
161 if (entry->linked) {
162 entry->linked->rt_param.linked_on = NO_CPU;
163 }
164
165 /* Link new task to CPU. */
166 if (linked) {
167 /* handle task is already scheduled somewhere! */
168 on_cpu = linked->rt_param.scheduled_on;
169 if (on_cpu != NO_CPU) {
170 sched = &per_cpu(cedf_cpu_entries, on_cpu);
171 /* this should only happen if not linked already */
172 BUG_ON(sched->linked == linked);
173
174 /* If we are already scheduled on the CPU to which we
175 * wanted to link, we don't need to do the swap --
176 * we just link ourselves to the CPU and depend on
177 * the caller to get things right.
178 */
179 if (entry != sched) {
180 TRACE_TASK(linked,
181 "already scheduled on %d, updating link.\n",
182 sched->cpu);
183 tmp = sched->linked;
184 linked->rt_param.linked_on = sched->cpu;
185 sched->linked = linked;
186 update_cpu_position(sched);
187 linked = tmp;
188 }
189 }
190 if (linked) /* might be NULL due to swap */
191 linked->rt_param.linked_on = entry->cpu;
192 }
193 entry->linked = linked;
194#ifdef WANT_ALL_SCHED_EVENTS
195 if (linked)
196 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
197 else
198 TRACE("NULL linked to %d.\n", entry->cpu);
199#endif
200 update_cpu_position(entry);
201}
202
203/* unlink - Make sure a task is not linked any longer to an entry
204 * where it was linked before. Must hold cedf_lock.
205 */
206static noinline void unlink(struct task_struct* t)
207{
208 cpu_entry_t *entry;
209
210 if (t->rt_param.linked_on != NO_CPU) {
211 /* unlink */
212 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
213 t->rt_param.linked_on = NO_CPU;
214 link_task_to_cpu(NULL, entry);
215 } else if (is_queued(t)) {
216 /* This is an interesting situation: t is scheduled,
217 * but was just recently unlinked. It cannot be
218 * linked anywhere else (because then it would have
219 * been relinked to this CPU), thus it must be in some
220 * queue. We must remove it from the list in this
221 * case.
222 *
223 * in C-EDF case is should be somewhere in the queue for
224 * its domain, therefore and we can get the domain using
225 * task_cpu_cluster
226 */
227 remove(&(task_cpu_cluster(t))->domain, t);
228 }
229}
230
231
232/* preempt - force a CPU to reschedule
233 */
234static void preempt(cpu_entry_t *entry)
235{
236 preempt_if_preemptable(entry->scheduled, entry->cpu);
237}
238
239/* requeue - Put an unlinked task into gsn-edf domain.
240 * Caller must hold cedf_lock.
241 */
242static noinline void requeue(struct task_struct* task)
243{
244 cedf_domain_t *cluster = task_cpu_cluster(task);
245 BUG_ON(!task);
246 /* sanity check before insertion */
247 BUG_ON(is_queued(task));
248
249 if (is_early_releasing(task) || is_released(task, litmus_clock()))
250 __add_ready(&cluster->domain, task);
251 else {
252 /* it has got to wait */
253 add_release(&cluster->domain, task);
254 }
255}
256
257#ifdef CONFIG_SCHED_CPU_AFFINITY
258static cpu_entry_t* cedf_get_nearest_available_cpu(
259 cedf_domain_t *cluster, cpu_entry_t *start)
260{
261 cpu_entry_t *affinity;
262
263 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
264#ifdef CONFIG_RELEASE_MASTER
265 cluster->domain.release_master,
266#else
267 NO_CPU,
268#endif
269 cluster->cpu_map);
270
271 /* make sure CPU is in our cluster */
272 if (affinity && cpumask_test_cpu(affinity->cpu, cluster->cpu_map))
273 return(affinity);
274 else
275 return(NULL);
276}
277#endif
278
279
280/* check for any necessary preemptions */
281static void check_for_preemptions(cedf_domain_t *cluster)
282{
283 struct task_struct *task;
284 cpu_entry_t *last;
285
286#ifdef CONFIG_PREFER_LOCAL_LINKING
287 cpu_entry_t *local;
288
289 /* Before linking to other CPUs, check first whether the local CPU is
290 * idle. */
291 local = this_cpu_ptr(&cedf_cpu_entries);
292 task = __peek_ready(&cluster->domain);
293
294 if (task && !local->linked
295#ifdef CONFIG_RELEASE_MASTER
296 && likely(local->cpu != cluster->domain.release_master)
297#endif
298 ) {
299 task = __take_ready(&cluster->domain);
300 TRACE_TASK(task, "linking to local CPU %d to avoid IPI\n", local->cpu);
301 link_task_to_cpu(task, local);
302 preempt(local);
303 }
304#endif
305
306
307 for(last = lowest_prio_cpu(cluster);
308 edf_preemption_needed(&cluster->domain, last->linked);
309 last = lowest_prio_cpu(cluster)) {
310 /* preemption necessary */
311 task = __take_ready(&cluster->domain);
312 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
313 task->pid, last->cpu);
314#ifdef CONFIG_SCHED_CPU_AFFINITY
315 {
316 cpu_entry_t *affinity =
317 cedf_get_nearest_available_cpu(cluster,
318 &per_cpu(cedf_cpu_entries, task_cpu(task)));
319 if(affinity)
320 last = affinity;
321 else if(requeue_preempted_job(last->linked))
322 requeue(last->linked);
323 }
324#else
325 if (requeue_preempted_job(last->linked))
326 requeue(last->linked);
327#endif
328 link_task_to_cpu(task, last);
329 preempt(last);
330 }
331}
332
333/* cedf_job_arrival: task is either resumed or released */
334static noinline void cedf_job_arrival(struct task_struct* task)
335{
336 cedf_domain_t *cluster = task_cpu_cluster(task);
337 BUG_ON(!task);
338
339 requeue(task);
340 check_for_preemptions(cluster);
341}
342
343static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
344{
345 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
346 unsigned long flags;
347
348 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
349
350 __merge_ready(&cluster->domain, tasks);
351 check_for_preemptions(cluster);
352
353 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
354}
355
356/* caller holds cedf_lock */
357static noinline void current_job_completion(int forced)
358{
359 struct task_struct *t = current;
360
361 sched_trace_task_completion(t, forced);
362
363 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
364
365 /* set flags */
366 tsk_rt(t)->completed = 0;
367 /* prepare for next period */
368 prepare_for_next_period(t);
369 if (is_early_releasing(t) || is_released(t, litmus_clock()))
370 sched_trace_task_release(t);
371 /* unlink */
372 unlink(t);
373 /* requeue
374 * But don't requeue a blocking task. */
375 if (is_current_running())
376 cedf_job_arrival(t);
377}
378
379/* Getting schedule() right is a bit tricky. schedule() may not make any
380 * assumptions on the state of the current task since it may be called for a
381 * number of reasons. The reasons include a scheduler_tick() determined that it
382 * was necessary, because sys_exit_np() was called, because some Linux
383 * subsystem determined so, or even (in the worst case) because there is a bug
384 * hidden somewhere. Thus, we must take extreme care to determine what the
385 * current state is.
386 *
387 * The CPU could currently be scheduling a task (or not), be linked (or not).
388 *
389 * The following assertions for the scheduled task could hold:
390 *
391 * - !is_running(scheduled) // the job blocks
392 * - scheduled->timeslice == 0 // the job completed (forcefully)
393 * - is_completed() // the job completed (by syscall)
394 * - linked != scheduled // we need to reschedule (for any reason)
395 * - is_np(scheduled) // rescheduling must be delayed,
396 * sys_exit_np must be requested
397 *
398 * Any of these can occur together.
399 */
400static struct task_struct* cedf_schedule(struct task_struct * prev)
401{
402 cpu_entry_t* entry = this_cpu_ptr(&cedf_cpu_entries);
403 cedf_domain_t *cluster = entry->cluster;
404 int out_of_time, sleep, preempt, np, exists, blocks;
405 struct task_struct* next = NULL;
406
407#ifdef CONFIG_RELEASE_MASTER
408 /* Bail out early if we are the release master.
409 * The release master never schedules any real-time tasks.
410 */
411 if (unlikely(cluster->domain.release_master == entry->cpu)) {
412 sched_state_task_picked();
413 return NULL;
414 }
415#endif
416
417 raw_spin_lock(&cluster->cluster_lock);
418
419 /* sanity checking */
420 BUG_ON(entry->scheduled && entry->scheduled != prev);
421 BUG_ON(entry->scheduled && !is_realtime(prev));
422 BUG_ON(is_realtime(prev) && !entry->scheduled);
423
424 /* (0) Determine state */
425 exists = entry->scheduled != NULL;
426 blocks = exists && !is_current_running();
427 out_of_time = exists && budget_enforced(entry->scheduled)
428 && budget_exhausted(entry->scheduled);
429 np = exists && is_np(entry->scheduled);
430 sleep = exists && is_completed(entry->scheduled);
431 preempt = entry->scheduled != entry->linked;
432
433#ifdef WANT_ALL_SCHED_EVENTS
434 TRACE_TASK(prev, "invoked cedf_schedule.\n");
435#endif
436
437 if (exists)
438 TRACE_TASK(prev,
439 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
440 "state:%d sig:%d\n",
441 blocks, out_of_time, np, sleep, preempt,
442 prev->state, signal_pending(prev));
443 if (entry->linked && preempt)
444 TRACE_TASK(prev, "will be preempted by %s/%d\n",
445 entry->linked->comm, entry->linked->pid);
446
447
448 /* If a task blocks we have no choice but to reschedule.
449 */
450 if (blocks)
451 unlink(entry->scheduled);
452
453 /* Request a sys_exit_np() call if we would like to preempt but cannot.
454 * We need to make sure to update the link structure anyway in case
455 * that we are still linked. Multiple calls to request_exit_np() don't
456 * hurt.
457 */
458 if (np && (out_of_time || preempt || sleep)) {
459 unlink(entry->scheduled);
460 request_exit_np(entry->scheduled);
461 }
462
463 /* Any task that is preemptable and either exhausts its execution
464 * budget or wants to sleep completes. We may have to reschedule after
465 * this. Don't do a job completion if we block (can't have timers running
466 * for blocked jobs).
467 */
468 if (!np && (out_of_time || sleep))
469 current_job_completion(!sleep);
470
471 /* Link pending task if we became unlinked.
472 */
473 if (!entry->linked)
474 link_task_to_cpu(__take_ready(&cluster->domain), entry);
475
476 /* The final scheduling decision. Do we need to switch for some reason?
477 * If linked is different from scheduled, then select linked as next.
478 */
479 if ((!np || blocks) &&
480 entry->linked != entry->scheduled) {
481 /* Schedule a linked job? */
482 if (entry->linked) {
483 entry->linked->rt_param.scheduled_on = entry->cpu;
484 next = entry->linked;
485 }
486 if (entry->scheduled) {
487 /* not gonna be scheduled soon */
488 entry->scheduled->rt_param.scheduled_on = NO_CPU;
489 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
490 }
491 } else
492 /* Only override Linux scheduler if we have a real-time task
493 * scheduled that needs to continue.
494 */
495 if (exists)
496 next = prev;
497
498 sched_state_task_picked();
499 raw_spin_unlock(&cluster->cluster_lock);
500
501#ifdef WANT_ALL_SCHED_EVENTS
502 TRACE("cedf_lock released, next=0x%p\n", next);
503
504 if (next)
505 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
506 else if (exists && !next)
507 TRACE("becomes idle at %llu.\n", litmus_clock());
508#endif
509
510
511 return next;
512}
513
514
515/* _finish_switch - we just finished the switch away from prev
516 */
517static void cedf_finish_switch(struct task_struct *prev)
518{
519 cpu_entry_t* entry = this_cpu_ptr(&cedf_cpu_entries);
520
521 entry->scheduled = is_realtime(current) ? current : NULL;
522#ifdef WANT_ALL_SCHED_EVENTS
523 TRACE_TASK(prev, "switched away from\n");
524#endif
525}
526
527
528/* Prepare a task for running in RT mode
529 */
530static void cedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
531{
532 unsigned long flags;
533 cpu_entry_t* entry;
534 cedf_domain_t* cluster;
535
536 TRACE("gsn edf: task new %d\n", t->pid);
537
538 /* the cluster doesn't change even if t is scheduled */
539 cluster = task_cpu_cluster(t);
540
541 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
542
543 /* setup job params */
544 release_at(t, litmus_clock());
545
546 if (is_scheduled) {
547 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
548 BUG_ON(entry->scheduled);
549
550#ifdef CONFIG_RELEASE_MASTER
551 if (entry->cpu != cluster->domain.release_master) {
552#endif
553 entry->scheduled = t;
554 tsk_rt(t)->scheduled_on = task_cpu(t);
555#ifdef CONFIG_RELEASE_MASTER
556 } else {
557 /* do not schedule on release master */
558 preempt(entry); /* force resched */
559 tsk_rt(t)->scheduled_on = NO_CPU;
560 }
561#endif
562 } else {
563 t->rt_param.scheduled_on = NO_CPU;
564 }
565 t->rt_param.linked_on = NO_CPU;
566
567 if (on_rq || is_scheduled)
568 cedf_job_arrival(t);
569 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
570}
571
572static void cedf_task_wake_up(struct task_struct *task)
573{
574 unsigned long flags;
575 lt_t now;
576 cedf_domain_t *cluster;
577
578 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
579
580 cluster = task_cpu_cluster(task);
581
582 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
583 now = litmus_clock();
584 if (is_sporadic(task) && is_tardy(task, now)) {
585 /* new sporadic release */
586 release_at(task, now);
587 sched_trace_task_release(task);
588 }
589 cedf_job_arrival(task);
590 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
591}
592
593static void cedf_task_block(struct task_struct *t)
594{
595 unsigned long flags;
596 cedf_domain_t *cluster;
597
598 TRACE_TASK(t, "block at %llu\n", litmus_clock());
599
600 cluster = task_cpu_cluster(t);
601
602 /* unlink if necessary */
603 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
604 unlink(t);
605 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
606
607 BUG_ON(!is_realtime(t));
608}
609
610
611static void cedf_task_exit(struct task_struct * t)
612{
613 unsigned long flags;
614 cedf_domain_t *cluster = task_cpu_cluster(t);
615
616 /* unlink if necessary */
617 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
618 unlink(t);
619 if (tsk_rt(t)->scheduled_on != NO_CPU) {
620 cpu_entry_t *cpu;
621 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
622 cpu->scheduled = NULL;
623 tsk_rt(t)->scheduled_on = NO_CPU;
624 }
625 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
626
627 BUG_ON(!is_realtime(t));
628 TRACE_TASK(t, "RIP\n");
629}
630
631static long cedf_admit_task(struct task_struct* tsk)
632{
633 return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ?
634 0 : -EINVAL;
635}
636
637/* total number of cluster */
638static int num_clusters;
639/* we do not support cluster of different sizes */
640static unsigned int cluster_size;
641
642#ifdef VERBOSE_INIT
643static void print_cluster_topology(cpumask_var_t mask, int cpu)
644{
645 printk(KERN_INFO "CPU = %d, shared cpu(s) = %*pbl\n", cpu,
646 cpumask_pr_args(mask));
647
648}
649#endif
650
651static int clusters_allocated = 0;
652
653static void cleanup_cedf(void)
654{
655 int i;
656
657 if (clusters_allocated) {
658 for (i = 0; i < num_clusters; i++) {
659 kfree(cedf[i].cpus);
660 kfree(cedf[i].heap_node);
661 free_cpumask_var(cedf[i].cpu_map);
662 }
663
664 kfree(cedf);
665 }
666}
667
668static struct domain_proc_info cedf_domain_proc_info;
669static long cedf_get_domain_proc_info(struct domain_proc_info **ret)
670{
671 *ret = &cedf_domain_proc_info;
672 return 0;
673}
674
675static void cedf_setup_domain_proc(void)
676{
677 int i, cpu, domain;
678#ifdef CONFIG_RELEASE_MASTER
679 int release_master = atomic_read(&release_master_cpu);
680 /* skip over the domain with the release master if cluster size is 1 */
681 int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
682 release_master : NO_CPU;
683#else
684 int release_master = NO_CPU;
685 int skip_domain = NO_CPU;
686#endif
687 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
688 int num_rt_domains = num_clusters - (skip_domain != NO_CPU);
689 struct cd_mapping *map;
690
691 memset(&cedf_domain_proc_info, sizeof(cedf_domain_proc_info), 0);
692 init_domain_proc_info(&cedf_domain_proc_info, num_rt_cpus, num_rt_domains);
693 cedf_domain_proc_info.num_cpus = num_rt_cpus;
694 cedf_domain_proc_info.num_domains = num_rt_domains;
695
696 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
697 if (cpu == release_master)
698 continue;
699 map = &cedf_domain_proc_info.cpu_to_domains[i];
700 /* pointer math to figure out the domain index */
701 domain = remote_cluster(cpu) - cedf;
702 map->id = cpu;
703 cpumask_set_cpu(domain, map->mask);
704 ++i;
705 }
706
707 for (domain = 0, i = 0; domain < num_clusters; ++domain) {
708 if (domain == skip_domain)
709 continue;
710 map = &cedf_domain_proc_info.domain_to_cpus[i];
711 map->id = i;
712 cpumask_copy(map->mask, cedf[domain].cpu_map);
713 ++i;
714 }
715}
716
717static long cedf_activate_plugin(void)
718{
719 int i, j, cpu, ccpu, cpu_count;
720 cpu_entry_t *entry;
721
722 cpumask_var_t mask;
723 int chk = 0;
724
725 /* de-allocate old clusters, if any */
726 cleanup_cedf();
727
728 printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
729 cluster_config);
730
731 /* need to get cluster_size first */
732 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
733 return -ENOMEM;
734
735 if (cluster_config == GLOBAL_CLUSTER) {
736 cluster_size = num_online_cpus();
737 } else {
738 chk = get_shared_cpu_map(mask, 0, cluster_config);
739 if (chk) {
740 /* if chk != 0 then it is the max allowed index */
741 printk(KERN_INFO "C-EDF: Cluster configuration = %d "
742 "is not supported on this hardware.\n",
743 cluster_config);
744 /* User should notice that the configuration failed, so
745 * let's bail out. */
746 return -EINVAL;
747 }
748
749 cluster_size = cpumask_weight(mask);
750 }
751
752 if ((num_online_cpus() % cluster_size) != 0) {
753 /* this can't be right, some cpus are left out */
754 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
755 num_online_cpus(), cluster_size);
756 return -1;
757 }
758
759 num_clusters = num_online_cpus() / cluster_size;
760 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
761 num_clusters, cluster_size);
762
763 /* initialize clusters */
764 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
765 for (i = 0; i < num_clusters; i++) {
766
767 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
768 GFP_ATOMIC);
769 cedf[i].heap_node = kmalloc(
770 cluster_size * sizeof(struct bheap_node),
771 GFP_ATOMIC);
772 bheap_init(&(cedf[i].cpu_heap));
773 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
774
775 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
776 return -ENOMEM;
777#ifdef CONFIG_RELEASE_MASTER
778 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
779#endif
780 }
781
782 /* cycle through cluster and add cpus to them */
783 for (i = 0; i < num_clusters; i++) {
784
785 for_each_online_cpu(cpu) {
786 /* check if the cpu is already in a cluster */
787 for (j = 0; j < num_clusters; j++)
788 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
789 break;
790 /* if it is in a cluster go to next cpu */
791 if (j < num_clusters &&
792 cpumask_test_cpu(cpu, cedf[j].cpu_map))
793 continue;
794
795 /* this cpu isn't in any cluster */
796 /* get the shared cpus */
797 if (unlikely(cluster_config == GLOBAL_CLUSTER))
798 cpumask_copy(mask, cpu_online_mask);
799 else
800 get_shared_cpu_map(mask, cpu, cluster_config);
801
802 cpumask_copy(cedf[i].cpu_map, mask);
803#ifdef VERBOSE_INIT
804 print_cluster_topology(mask, cpu);
805#endif
806 /* add cpus to current cluster and init cpu_entry_t */
807 cpu_count = 0;
808 for_each_cpu(ccpu, cedf[i].cpu_map) {
809
810 entry = &per_cpu(cedf_cpu_entries, ccpu);
811 cedf[i].cpus[cpu_count] = entry;
812 atomic_set(&entry->will_schedule, 0);
813 entry->cpu = ccpu;
814 entry->cluster = &cedf[i];
815 entry->hn = &(cedf[i].heap_node[cpu_count]);
816 bheap_node_init(&entry->hn, entry);
817
818 cpu_count++;
819
820 entry->linked = NULL;
821 entry->scheduled = NULL;
822#ifdef CONFIG_RELEASE_MASTER
823 /* only add CPUs that should schedule jobs */
824 if (entry->cpu != entry->cluster->domain.release_master)
825#endif
826 update_cpu_position(entry);
827 }
828 /* done with this cluster */
829 break;
830 }
831 }
832
833 clusters_allocated = 1;
834 free_cpumask_var(mask);
835
836 cedf_setup_domain_proc();
837
838 return 0;
839}
840
841static long cedf_deactivate_plugin(void)
842{
843 destroy_domain_proc_info(&cedf_domain_proc_info);
844 return 0;
845}
846
847/* Plugin object */
848static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
849 .plugin_name = "C-EDF",
850 .finish_switch = cedf_finish_switch,
851 .task_new = cedf_task_new,
852 .complete_job = complete_job,
853 .task_exit = cedf_task_exit,
854 .schedule = cedf_schedule,
855 .task_wake_up = cedf_task_wake_up,
856 .task_block = cedf_task_block,
857 .admit_task = cedf_admit_task,
858 .activate_plugin = cedf_activate_plugin,
859 .deactivate_plugin = cedf_deactivate_plugin,
860 .get_domain_proc_info = cedf_get_domain_proc_info,
861};
862
863static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
864
865static int __init init_cedf(void)
866{
867 int err, fs;
868
869 err = register_sched_plugin(&cedf_plugin);
870 if (!err) {
871 fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
872 if (!fs)
873 cluster_file = create_cluster_file(cedf_dir, &cluster_config);
874 else
875 printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
876 }
877 return err;
878}
879
880static void clean_cedf(void)
881{
882 cleanup_cedf();
883 if (cluster_file)
884 remove_proc_entry("cluster", cedf_dir);
885 if (cedf_dir)
886 remove_plugin_proc_dir(&cedf_plugin);
887}
888
889module_init(init_cedf);
890module_exit(clean_cedf);