aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFelipe Cerqueira <felipec@mpi-sws.org>2013-02-12 13:18:11 -0500
committerBjoern Brandenburg <bbb@mpi-sws.org>2013-08-07 03:47:06 -0400
commita6e8c66b436815b7a7abdfb38808fb94cc70006b (patch)
tree4a7f621e7c97ab00b04aaa025f29f58eb5ee80aa
parent6a24cb19fc7b6d534f9576567eae1b522c12a1b3 (diff)
Add C-EDF scheduler plugin
-rw-r--r--litmus/Kconfig10
-rw-r--r--litmus/Makefile2
-rw-r--r--litmus/sched_cedf.c850
3 files changed, 861 insertions, 1 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 32c18c6eb58d..c764857aec82 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -2,6 +2,16 @@ menu "LITMUS^RT"
2 2
3menu "Scheduling" 3menu "Scheduling"
4 4
5config PLUGIN_CEDF
6 bool "Clustered-EDF"
7 depends on X86 && SYSFS
8 default y
9 help
10 Include the Clustered EDF (C-EDF) plugin in the kernel.
11 This is appropriate for large platforms with shared caches.
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches.
14
5config RELEASE_MASTER 15config RELEASE_MASTER
6 bool "Release-master Support" 16 bool "Release-master Support"
7 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP 17 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
diff --git a/litmus/Makefile b/litmus/Makefile
index 9757399238de..bcb007d9b592 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -22,7 +22,7 @@ obj-y = sched_plugin.o litmus.o \
22 sched_psn_edf.o \ 22 sched_psn_edf.o \
23 sched_pfp.o 23 sched_pfp.o
24 24
25 25obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
26obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 26obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
27 27
28obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 28obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..7cb3cc07ed21
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,850 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32
33#include <linux/module.h>
34
35#include <litmus/litmus.h>
36#include <litmus/jobs.h>
37#include <litmus/preempt.h>
38#include <litmus/budget.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/edf_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h>
49#endif
50
51/* to configure the cluster size */
52#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h>
54
55/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
57 * all CPUs form a single cluster (just like GSN-EDF).
58 */
59static enum cache_level cluster_config = GLOBAL_CLUSTER;
60
61struct clusterdomain;
62
63/* cpu_entry_t - maintain the linked and scheduled state
64 *
65 * A cpu also contains a pointer to the cedf_domain_t cluster
66 * that owns it (struct clusterdomain*)
67 */
68typedef struct {
69 int cpu;
70 struct clusterdomain* cluster; /* owning cluster */
71 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn;
75} cpu_entry_t;
76
77/* one cpu_entry_t per CPU */
78DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
79
80#define set_will_schedule() \
81 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
82#define clear_will_schedule() \
83 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
84#define test_will_schedule(cpu) \
85 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
86
87/*
88 * In C-EDF there is a cedf domain _per_ cluster
89 * The number of clusters is dynamically determined accordingly to the
90 * total cpu number and the cluster size
91 */
92typedef struct clusterdomain {
93 /* rt_domain for this cluster */
94 rt_domain_t domain;
95 /* cpus in this cluster */
96 cpu_entry_t* *cpus;
97 /* map of this cluster cpus */
98 cpumask_var_t cpu_map;
99 /* the cpus queue themselves according to priority in here */
100 struct bheap_node *heap_node;
101 struct bheap cpu_heap;
102 /* lock for this cluster */
103#define cluster_lock domain.ready_lock
104} cedf_domain_t;
105
106/* a cedf_domain per cluster; allocation is done at init/activation time */
107cedf_domain_t *cedf;
108
109#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
110#define task_cpu_cluster(task) remote_cluster(get_partition(task))
111
112/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
113 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
114 * information during the initialization of the plugin (e.g., topology)
115#define WANT_ALL_SCHED_EVENTS
116 */
117#define VERBOSE_INIT
118
119static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
120{
121 cpu_entry_t *a, *b;
122 a = _a->value;
123 b = _b->value;
124 /* Note that a and b are inverted: we want the lowest-priority CPU at
125 * the top of the heap.
126 */
127 return edf_higher_prio(b->linked, a->linked);
128}
129
130/* update_cpu_position - Move the cpu entry to the correct place to maintain
131 * order in the cpu queue. Caller must hold cedf lock.
132 */
133static void update_cpu_position(cpu_entry_t *entry)
134{
135 cedf_domain_t *cluster = entry->cluster;
136
137 if (likely(bheap_node_in_heap(entry->hn)))
138 bheap_delete(cpu_lower_prio,
139 &cluster->cpu_heap,
140 entry->hn);
141
142 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
143}
144
145/* caller must hold cedf lock */
146static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
147{
148 struct bheap_node* hn;
149 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
150 return hn->value;
151}
152
153
154/* link_task_to_cpu - Update the link of a CPU.
155 * Handles the case where the to-be-linked task is already
156 * scheduled on a different CPU.
157 */
158static noinline void link_task_to_cpu(struct task_struct* linked,
159 cpu_entry_t *entry)
160{
161 cpu_entry_t *sched;
162 struct task_struct* tmp;
163 int on_cpu;
164
165 BUG_ON(linked && !is_realtime(linked));
166
167 /* Currently linked task is set to be unlinked. */
168 if (entry->linked) {
169 entry->linked->rt_param.linked_on = NO_CPU;
170 }
171
172 /* Link new task to CPU. */
173 if (linked) {
174 /* handle task is already scheduled somewhere! */
175 on_cpu = linked->rt_param.scheduled_on;
176 if (on_cpu != NO_CPU) {
177 sched = &per_cpu(cedf_cpu_entries, on_cpu);
178 /* this should only happen if not linked already */
179 BUG_ON(sched->linked == linked);
180
181 /* If we are already scheduled on the CPU to which we
182 * wanted to link, we don't need to do the swap --
183 * we just link ourselves to the CPU and depend on
184 * the caller to get things right.
185 */
186 if (entry != sched) {
187 TRACE_TASK(linked,
188 "already scheduled on %d, updating link.\n",
189 sched->cpu);
190 tmp = sched->linked;
191 linked->rt_param.linked_on = sched->cpu;
192 sched->linked = linked;
193 update_cpu_position(sched);
194 linked = tmp;
195 }
196 }
197 if (linked) /* might be NULL due to swap */
198 linked->rt_param.linked_on = entry->cpu;
199 }
200 entry->linked = linked;
201#ifdef WANT_ALL_SCHED_EVENTS
202 if (linked)
203 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
204 else
205 TRACE("NULL linked to %d.\n", entry->cpu);
206#endif
207 update_cpu_position(entry);
208}
209
210/* unlink - Make sure a task is not linked any longer to an entry
211 * where it was linked before. Must hold cedf_lock.
212 */
213static noinline void unlink(struct task_struct* t)
214{
215 cpu_entry_t *entry;
216
217 if (t->rt_param.linked_on != NO_CPU) {
218 /* unlink */
219 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
220 t->rt_param.linked_on = NO_CPU;
221 link_task_to_cpu(NULL, entry);
222 } else if (is_queued(t)) {
223 /* This is an interesting situation: t is scheduled,
224 * but was just recently unlinked. It cannot be
225 * linked anywhere else (because then it would have
226 * been relinked to this CPU), thus it must be in some
227 * queue. We must remove it from the list in this
228 * case.
229 *
230 * in C-EDF case is should be somewhere in the queue for
231 * its domain, therefore and we can get the domain using
232 * task_cpu_cluster
233 */
234 remove(&(task_cpu_cluster(t))->domain, t);
235 }
236}
237
238
239/* preempt - force a CPU to reschedule
240 */
241static void preempt(cpu_entry_t *entry)
242{
243 preempt_if_preemptable(entry->scheduled, entry->cpu);
244}
245
246/* requeue - Put an unlinked task into gsn-edf domain.
247 * Caller must hold cedf_lock.
248 */
249static noinline void requeue(struct task_struct* task)
250{
251 cedf_domain_t *cluster = task_cpu_cluster(task);
252 BUG_ON(!task);
253 /* sanity check before insertion */
254 BUG_ON(is_queued(task));
255
256 if (is_early_releasing(task) || is_released(task, litmus_clock()))
257 __add_ready(&cluster->domain, task);
258 else {
259 /* it has got to wait */
260 add_release(&cluster->domain, task);
261 }
262}
263
264#ifdef CONFIG_SCHED_CPU_AFFINITY
265static cpu_entry_t* cedf_get_nearest_available_cpu(
266 cedf_domain_t *cluster, cpu_entry_t *start)
267{
268 cpu_entry_t *affinity;
269
270 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
271#ifdef CONFIG_RELEASE_MASTER
272 cluster->domain.release_master
273#else
274 NO_CPU
275#endif
276 );
277
278 /* make sure CPU is in our cluster */
279 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
280 return(affinity);
281 else
282 return(NULL);
283}
284#endif
285
286
287/* check for any necessary preemptions */
288static void check_for_preemptions(cedf_domain_t *cluster)
289{
290 struct task_struct *task;
291 cpu_entry_t *last;
292
293 for(last = lowest_prio_cpu(cluster);
294 edf_preemption_needed(&cluster->domain, last->linked);
295 last = lowest_prio_cpu(cluster)) {
296 /* preemption necessary */
297 task = __take_ready(&cluster->domain);
298 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
299 task->pid, last->cpu);
300#ifdef CONFIG_SCHED_CPU_AFFINITY
301 {
302 cpu_entry_t *affinity =
303 cedf_get_nearest_available_cpu(cluster,
304 &per_cpu(cedf_cpu_entries, task_cpu(task)));
305 if(affinity)
306 last = affinity;
307 else if(requeue_preempted_job(last->linked))
308 requeue(last->linked);
309 }
310#else
311 if (requeue_preempted_job(last->linked))
312 requeue(last->linked);
313#endif
314 link_task_to_cpu(task, last);
315 preempt(last);
316 }
317}
318
319/* cedf_job_arrival: task is either resumed or released */
320static noinline void cedf_job_arrival(struct task_struct* task)
321{
322 cedf_domain_t *cluster = task_cpu_cluster(task);
323 BUG_ON(!task);
324
325 requeue(task);
326 check_for_preemptions(cluster);
327}
328
329static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
330{
331 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
332 unsigned long flags;
333
334 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
335
336 __merge_ready(&cluster->domain, tasks);
337 check_for_preemptions(cluster);
338
339 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
340}
341
342/* caller holds cedf_lock */
343static noinline void job_completion(struct task_struct *t, int forced)
344{
345 BUG_ON(!t);
346
347 sched_trace_task_completion(t, forced);
348
349 TRACE_TASK(t, "job_completion().\n");
350
351 /* set flags */
352 tsk_rt(t)->completed = 0;
353 /* prepare for next period */
354 prepare_for_next_period(t);
355 if (is_early_releasing(t) || is_released(t, litmus_clock()))
356 sched_trace_task_release(t);
357 /* unlink */
358 unlink(t);
359 /* requeue
360 * But don't requeue a blocking task. */
361 if (is_running(t))
362 cedf_job_arrival(t);
363}
364
365/* cedf_tick - this function is called for every local timer
366 * interrupt.
367 *
368 * checks whether the current task has expired and checks
369 * whether we need to preempt it if it has not expired
370 */
371static void cedf_tick(struct task_struct* t)
372{
373 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
374 if (!is_np(t)) {
375 /* np tasks will be preempted when they become
376 * preemptable again
377 */
378 litmus_reschedule_local();
379 set_will_schedule();
380 TRACE("cedf_scheduler_tick: "
381 "%d is preemptable "
382 " => FORCE_RESCHED\n", t->pid);
383 } else if (is_user_np(t)) {
384 TRACE("cedf_scheduler_tick: "
385 "%d is non-preemptable, "
386 "preemption delayed.\n", t->pid);
387 request_exit_np(t);
388 }
389 }
390}
391
392/* Getting schedule() right is a bit tricky. schedule() may not make any
393 * assumptions on the state of the current task since it may be called for a
394 * number of reasons. The reasons include a scheduler_tick() determined that it
395 * was necessary, because sys_exit_np() was called, because some Linux
396 * subsystem determined so, or even (in the worst case) because there is a bug
397 * hidden somewhere. Thus, we must take extreme care to determine what the
398 * current state is.
399 *
400 * The CPU could currently be scheduling a task (or not), be linked (or not).
401 *
402 * The following assertions for the scheduled task could hold:
403 *
404 * - !is_running(scheduled) // the job blocks
405 * - scheduled->timeslice == 0 // the job completed (forcefully)
406 * - is_completed() // the job completed (by syscall)
407 * - linked != scheduled // we need to reschedule (for any reason)
408 * - is_np(scheduled) // rescheduling must be delayed,
409 * sys_exit_np must be requested
410 *
411 * Any of these can occur together.
412 */
413static struct task_struct* cedf_schedule(struct task_struct * prev)
414{
415 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
416 cedf_domain_t *cluster = entry->cluster;
417 int out_of_time, sleep, preempt, np, exists, blocks;
418 struct task_struct* next = NULL;
419
420#ifdef CONFIG_RELEASE_MASTER
421 /* Bail out early if we are the release master.
422 * The release master never schedules any real-time tasks.
423 */
424 if (unlikely(cluster->domain.release_master == entry->cpu)) {
425 sched_state_task_picked();
426 return NULL;
427 }
428#endif
429
430 raw_spin_lock(&cluster->cluster_lock);
431 clear_will_schedule();
432
433 /* sanity checking */
434 BUG_ON(entry->scheduled && entry->scheduled != prev);
435 BUG_ON(entry->scheduled && !is_realtime(prev));
436 BUG_ON(is_realtime(prev) && !entry->scheduled);
437
438 /* (0) Determine state */
439 exists = entry->scheduled != NULL;
440 blocks = exists && !is_running(entry->scheduled);
441 out_of_time = exists &&
442 budget_enforced(entry->scheduled) &&
443 budget_exhausted(entry->scheduled);
444 np = exists && is_np(entry->scheduled);
445 sleep = exists && is_completed(entry->scheduled);
446 preempt = entry->scheduled != entry->linked;
447
448#ifdef WANT_ALL_SCHED_EVENTS
449 TRACE_TASK(prev, "invoked cedf_schedule.\n");
450#endif
451
452 if (exists)
453 TRACE_TASK(prev,
454 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
455 "state:%d sig:%d\n",
456 blocks, out_of_time, np, sleep, preempt,
457 prev->state, signal_pending(prev));
458 if (entry->linked && preempt)
459 TRACE_TASK(prev, "will be preempted by %s/%d\n",
460 entry->linked->comm, entry->linked->pid);
461
462
463 /* If a task blocks we have no choice but to reschedule.
464 */
465 if (blocks)
466 unlink(entry->scheduled);
467
468 /* Request a sys_exit_np() call if we would like to preempt but cannot.
469 * We need to make sure to update the link structure anyway in case
470 * that we are still linked. Multiple calls to request_exit_np() don't
471 * hurt.
472 */
473 if (np && (out_of_time || preempt || sleep)) {
474 unlink(entry->scheduled);
475 request_exit_np(entry->scheduled);
476 }
477
478 /* Any task that is preemptable and either exhausts its execution
479 * budget or wants to sleep completes. We may have to reschedule after
480 * this. Don't do a job completion if we block (can't have timers running
481 * for blocked jobs).
482 */
483 if (!np && (out_of_time || sleep) && !blocks)
484 job_completion(entry->scheduled, !sleep);
485
486 /* Link pending task if we became unlinked.
487 */
488 if (!entry->linked)
489 link_task_to_cpu(__take_ready(&cluster->domain), entry);
490
491 /* The final scheduling decision. Do we need to switch for some reason?
492 * If linked is different from scheduled, then select linked as next.
493 */
494 if ((!np || blocks) &&
495 entry->linked != entry->scheduled) {
496 /* Schedule a linked job? */
497 if (entry->linked) {
498 entry->linked->rt_param.scheduled_on = entry->cpu;
499 next = entry->linked;
500 }
501 if (entry->scheduled) {
502 /* not gonna be scheduled soon */
503 entry->scheduled->rt_param.scheduled_on = NO_CPU;
504 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
505 }
506 } else
507 /* Only override Linux scheduler if we have a real-time task
508 * scheduled that needs to continue.
509 */
510 if (exists)
511 next = prev;
512
513 sched_state_task_picked();
514 raw_spin_unlock(&cluster->cluster_lock);
515
516#ifdef WANT_ALL_SCHED_EVENTS
517 TRACE("cedf_lock released, next=0x%p\n", next);
518
519 if (next)
520 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
521 else if (exists && !next)
522 TRACE("becomes idle at %llu.\n", litmus_clock());
523#endif
524
525
526 return next;
527}
528
529
530/* _finish_switch - we just finished the switch away from prev
531 */
532static void cedf_finish_switch(struct task_struct *prev)
533{
534 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
535
536 entry->scheduled = is_realtime(current) ? current : NULL;
537#ifdef WANT_ALL_SCHED_EVENTS
538 TRACE_TASK(prev, "switched away from\n");
539#endif
540}
541
542
543/* Prepare a task for running in RT mode
544 */
545static void cedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
546{
547 unsigned long flags;
548 cpu_entry_t* entry;
549 cedf_domain_t* cluster;
550
551 TRACE("gsn edf: task new %d\n", t->pid);
552
553 /* the cluster doesn't change even if t is scheduled */
554 cluster = task_cpu_cluster(t);
555
556 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
557
558 /* setup job params */
559 release_at(t, litmus_clock());
560
561 if (is_scheduled) {
562 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
563 BUG_ON(entry->scheduled);
564
565#ifdef CONFIG_RELEASE_MASTER
566 if (entry->cpu != cluster->domain.release_master) {
567#endif
568 entry->scheduled = t;
569 tsk_rt(t)->scheduled_on = task_cpu(t);
570#ifdef CONFIG_RELEASE_MASTER
571 } else {
572 /* do not schedule on release master */
573 preempt(entry); /* force resched */
574 tsk_rt(t)->scheduled_on = NO_CPU;
575 }
576#endif
577 } else {
578 t->rt_param.scheduled_on = NO_CPU;
579 }
580 t->rt_param.linked_on = NO_CPU;
581
582 if (is_running(t))
583 cedf_job_arrival(t);
584 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
585}
586
587static void cedf_task_wake_up(struct task_struct *task)
588{
589 unsigned long flags;
590 lt_t now;
591 cedf_domain_t *cluster;
592
593 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
594
595 cluster = task_cpu_cluster(task);
596
597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
598 now = litmus_clock();
599 if (is_sporadic(task) && is_tardy(task, now)) {
600 /* new sporadic release */
601 release_at(task, now);
602 sched_trace_task_release(task);
603 }
604 cedf_job_arrival(task);
605 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
606}
607
608static void cedf_task_block(struct task_struct *t)
609{
610 unsigned long flags;
611 cedf_domain_t *cluster;
612
613 TRACE_TASK(t, "block at %llu\n", litmus_clock());
614
615 cluster = task_cpu_cluster(t);
616
617 /* unlink if necessary */
618 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
619 unlink(t);
620 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
621
622 BUG_ON(!is_realtime(t));
623}
624
625
626static void cedf_task_exit(struct task_struct * t)
627{
628 unsigned long flags;
629 cedf_domain_t *cluster = task_cpu_cluster(t);
630
631 /* unlink if necessary */
632 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
633 unlink(t);
634 if (tsk_rt(t)->scheduled_on != NO_CPU) {
635 cpu_entry_t *cpu;
636 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
637 cpu->scheduled = NULL;
638 tsk_rt(t)->scheduled_on = NO_CPU;
639 }
640 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
641
642 BUG_ON(!is_realtime(t));
643 TRACE_TASK(t, "RIP\n");
644}
645
646static long cedf_admit_task(struct task_struct* tsk)
647{
648 return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ?
649 0 : -EINVAL;
650}
651
652/* total number of cluster */
653static int num_clusters;
654/* we do not support cluster of different sizes */
655static unsigned int cluster_size;
656
657#ifdef VERBOSE_INIT
658static void print_cluster_topology(cpumask_var_t mask, int cpu)
659{
660 int chk;
661 char buf[255];
662
663 chk = cpulist_scnprintf(buf, 254, mask);
664 buf[chk] = '\0';
665 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
666
667}
668#endif
669
670static int clusters_allocated = 0;
671
672static void cleanup_cedf(void)
673{
674 int i;
675
676 if (clusters_allocated) {
677 for (i = 0; i < num_clusters; i++) {
678 kfree(cedf[i].cpus);
679 kfree(cedf[i].heap_node);
680 free_cpumask_var(cedf[i].cpu_map);
681 }
682
683 kfree(cedf);
684 }
685}
686
687static long cedf_activate_plugin(void)
688{
689 int i, j, cpu, ccpu, cpu_count;
690 cpu_entry_t *entry;
691
692 cpumask_var_t mask;
693 int chk = 0;
694
695 /* de-allocate old clusters, if any */
696 cleanup_cedf();
697
698 printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
699 cluster_config);
700
701 /* need to get cluster_size first */
702 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
703 return -ENOMEM;
704
705 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
706 cluster_size = num_online_cpus();
707 } else {
708 chk = get_shared_cpu_map(mask, 0, cluster_config);
709 if (chk) {
710 /* if chk != 0 then it is the max allowed index */
711 printk(KERN_INFO "C-EDF: Cluster configuration = %d "
712 "is not supported on this hardware.\n",
713 cluster_config);
714 /* User should notice that the configuration failed, so
715 * let's bail out. */
716 return -EINVAL;
717 }
718
719 cluster_size = cpumask_weight(mask);
720 }
721
722 if ((num_online_cpus() % cluster_size) != 0) {
723 /* this can't be right, some cpus are left out */
724 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
725 num_online_cpus(), cluster_size);
726 return -1;
727 }
728
729 num_clusters = num_online_cpus() / cluster_size;
730 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
731 num_clusters, cluster_size);
732
733 /* initialize clusters */
734 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
735 for (i = 0; i < num_clusters; i++) {
736
737 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
738 GFP_ATOMIC);
739 cedf[i].heap_node = kmalloc(
740 cluster_size * sizeof(struct bheap_node),
741 GFP_ATOMIC);
742 bheap_init(&(cedf[i].cpu_heap));
743 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
744
745 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
746 return -ENOMEM;
747#ifdef CONFIG_RELEASE_MASTER
748 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
749#endif
750 }
751
752 /* cycle through cluster and add cpus to them */
753 for (i = 0; i < num_clusters; i++) {
754
755 for_each_online_cpu(cpu) {
756 /* check if the cpu is already in a cluster */
757 for (j = 0; j < num_clusters; j++)
758 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
759 break;
760 /* if it is in a cluster go to next cpu */
761 if (j < num_clusters &&
762 cpumask_test_cpu(cpu, cedf[j].cpu_map))
763 continue;
764
765 /* this cpu isn't in any cluster */
766 /* get the shared cpus */
767 if (unlikely(cluster_config == GLOBAL_CLUSTER))
768 cpumask_copy(mask, cpu_online_mask);
769 else
770 get_shared_cpu_map(mask, cpu, cluster_config);
771
772 cpumask_copy(cedf[i].cpu_map, mask);
773#ifdef VERBOSE_INIT
774 print_cluster_topology(mask, cpu);
775#endif
776 /* add cpus to current cluster and init cpu_entry_t */
777 cpu_count = 0;
778 for_each_cpu(ccpu, cedf[i].cpu_map) {
779
780 entry = &per_cpu(cedf_cpu_entries, ccpu);
781 cedf[i].cpus[cpu_count] = entry;
782 atomic_set(&entry->will_schedule, 0);
783 entry->cpu = ccpu;
784 entry->cluster = &cedf[i];
785 entry->hn = &(cedf[i].heap_node[cpu_count]);
786 bheap_node_init(&entry->hn, entry);
787
788 cpu_count++;
789
790 entry->linked = NULL;
791 entry->scheduled = NULL;
792#ifdef CONFIG_RELEASE_MASTER
793 /* only add CPUs that should schedule jobs */
794 if (entry->cpu != entry->cluster->domain.release_master)
795#endif
796 update_cpu_position(entry);
797 }
798 /* done with this cluster */
799 break;
800 }
801 }
802
803 free_cpumask_var(mask);
804 clusters_allocated = 1;
805 return 0;
806}
807
808/* Plugin object */
809static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
810 .plugin_name = "C-EDF",
811 .finish_switch = cedf_finish_switch,
812 .tick = cedf_tick,
813 .task_new = cedf_task_new,
814 .complete_job = complete_job,
815 .task_exit = cedf_task_exit,
816 .schedule = cedf_schedule,
817 .task_wake_up = cedf_task_wake_up,
818 .task_block = cedf_task_block,
819 .admit_task = cedf_admit_task,
820 .activate_plugin = cedf_activate_plugin,
821};
822
823static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
824
825static int __init init_cedf(void)
826{
827 int err, fs;
828
829 err = register_sched_plugin(&cedf_plugin);
830 if (!err) {
831 fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
832 if (!fs)
833 cluster_file = create_cluster_file(cedf_dir, &cluster_config);
834 else
835 printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
836 }
837 return err;
838}
839
840static void clean_cedf(void)
841{
842 cleanup_cedf();
843 if (cluster_file)
844 remove_proc_entry("cluster", cedf_dir);
845 if (cedf_dir)
846 remove_plugin_proc_dir(&cedf_plugin);
847}
848
849module_init(init_cedf);
850module_exit(clean_cedf);