aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-28 10:51:01 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-29 17:43:59 -0400
commit7c1ff4c544dd650cceff3cd69a04bcba60856678 (patch)
tree76d2dee2a96363f283b9440d46d1ed8be4fa3aff
parent425a6b5043bcc2142804107c853f978ac2fe3040 (diff)
Add C-EDF Plugin2010.1
Improved C-EDF plugin. C-EDF now supports different cluster sizes (based on L2 and L3 cache sharing) and supports dynamic changes of cluster size (this requires reloading the plugin).
-rw-r--r--include/litmus/sched_plugin.h3
-rw-r--r--litmus/Makefile1
-rw-r--r--litmus/litmus.c64
-rw-r--r--litmus/sched_cedf.c756
-rw-r--r--litmus/sched_plugin.c8
5 files changed, 832 insertions, 0 deletions
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 2d856d587041..9c1c9f28ba79 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -133,6 +133,9 @@ struct sched_plugin {
133 133
134extern struct sched_plugin *litmus; 134extern struct sched_plugin *litmus;
135 135
136/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
137extern int cluster_cache_index;
138
136int register_sched_plugin(struct sched_plugin* plugin); 139int register_sched_plugin(struct sched_plugin* plugin);
137struct sched_plugin* find_sched_plugin(const char* name); 140struct sched_plugin* find_sched_plugin(const char* name);
138int print_sched_plugins(char* buf, int max); 141int print_sched_plugins(char* buf, int max);
diff --git a/litmus/Makefile b/litmus/Makefile
index ff4eb8a7b6c4..0cc33e8bee51 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -14,6 +14,7 @@ obj-y = sched_plugin.o litmus.o \
14 ctrldev.o \ 14 ctrldev.o \
15 sched_gsn_edf.o \ 15 sched_gsn_edf.o \
16 sched_psn_edf.o \ 16 sched_psn_edf.o \
17 sched_cedf.o \
17 sched_pfair.o 18 sched_pfair.o
18 19
19obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 20obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 3ef2df8ffb50..e43596a5104c 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -566,6 +566,55 @@ static int proc_write_curr(struct file *file,
566 return len; 566 return len;
567} 567}
568 568
569static int proc_read_cluster_size(char *page, char **start,
570 off_t off, int count,
571 int *eof, void *data)
572{
573 int len;
574 if (cluster_cache_index == 2)
575 len = snprintf(page, PAGE_SIZE, "L2\n");
576 else if (cluster_cache_index == 3)
577 len = snprintf(page, PAGE_SIZE, "L3\n");
578 else /* (cluster_cache_index == 1) */
579 len = snprintf(page, PAGE_SIZE, "L1\n");
580
581 return len;
582}
583
584static int proc_write_cluster_size(struct file *file,
585 const char *buffer,
586 unsigned long count,
587 void *data)
588{
589 int len;
590 /* L2, L3 */
591 char cache_name[33];
592
593 if(count > 32)
594 len = 32;
595 else
596 len = count;
597
598 if(copy_from_user(cache_name, buffer, len))
599 return -EFAULT;
600
601 cache_name[len] = '\0';
602 /* chomp name */
603 if (len > 1 && cache_name[len - 1] == '\n')
604 cache_name[len - 1] = '\0';
605
606 /* do a quick and dirty comparison to find the cluster size */
607 if (!strcmp(cache_name, "L2"))
608 cluster_cache_index = 2;
609 else if (!strcmp(cache_name, "L3"))
610 cluster_cache_index = 3;
611 else if (!strcmp(cache_name, "L1"))
612 cluster_cache_index = 1;
613 else
614 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
615
616 return len;
617}
569 618
570static int proc_read_release_master(char *page, char **start, 619static int proc_read_release_master(char *page, char **start,
571 off_t off, int count, 620 off_t off, int count,
@@ -621,6 +670,7 @@ static struct proc_dir_entry *litmus_dir = NULL,
621 *curr_file = NULL, 670 *curr_file = NULL,
622 *stat_file = NULL, 671 *stat_file = NULL,
623 *plugs_file = NULL, 672 *plugs_file = NULL,
673 *clus_cache_idx_file = NULL,
624 *release_master_file = NULL; 674 *release_master_file = NULL;
625 675
626static int __init init_litmus_proc(void) 676static int __init init_litmus_proc(void)
@@ -651,6 +701,16 @@ static int __init init_litmus_proc(void)
651 release_master_file->read_proc = proc_read_release_master; 701 release_master_file->read_proc = proc_read_release_master;
652 release_master_file->write_proc = proc_write_release_master; 702 release_master_file->write_proc = proc_write_release_master;
653 703
704 clus_cache_idx_file = create_proc_entry("cluster_cache",
705 0644, litmus_dir);
706 if (!clus_cache_idx_file) {
707 printk(KERN_ERR "Could not allocate cluster_cache "
708 "procfs entry.\n");
709 return -ENOMEM;
710 }
711 clus_cache_idx_file->read_proc = proc_read_cluster_size;
712 clus_cache_idx_file->write_proc = proc_write_cluster_size;
713
654 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 714 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
655 proc_read_stats, NULL); 715 proc_read_stats, NULL);
656 716
@@ -668,6 +728,10 @@ static void exit_litmus_proc(void)
668 remove_proc_entry("stats", litmus_dir); 728 remove_proc_entry("stats", litmus_dir);
669 if (curr_file) 729 if (curr_file)
670 remove_proc_entry("active_plugin", litmus_dir); 730 remove_proc_entry("active_plugin", litmus_dir);
731 if (clus_cache_idx_file)
732 remove_proc_entry("cluster_cache", litmus_dir);
733 if (release_master_file)
734 remove_proc_entry("release_master", litmus_dir);
671 if (litmus_dir) 735 if (litmus_dir)
672 remove_proc_entry("litmus", NULL); 736 remove_proc_entry("litmus", NULL);
673} 737}
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..da44b451c9ad
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,756 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3
16 *
17 * For details on functions, take a look at sched_gsn_edf.c
18 *
19 * This version uses the simple approach and serializes all scheduling
20 * decisions by the use of a queue lock. This is probably not the
21 * best way to do it, but it should suffice for now.
22 */
23
24#include <linux/spinlock.h>
25#include <linux/percpu.h>
26#include <linux/sched.h>
27
28#include <litmus/litmus.h>
29#include <litmus/jobs.h>
30#include <litmus/sched_plugin.h>
31#include <litmus/edf_common.h>
32#include <litmus/sched_trace.h>
33
34#include <litmus/bheap.h>
35
36#include <linux/module.h>
37
38/* forward declaration... a funny thing with C ;) */
39struct clusterdomain;
40
41/* cpu_entry_t - maintain the linked and scheduled state
42 *
43 * A cpu also contains a pointer to the cedf_domain_t cluster
44 * that owns it (struct clusterdomain*)
45 */
46typedef struct {
47 int cpu;
48 struct clusterdomain* cluster; /* owning cluster */
49 struct task_struct* linked; /* only RT tasks */
50 struct task_struct* scheduled; /* only RT tasks */
51 atomic_t will_schedule; /* prevent unneeded IPIs */
52 struct bheap_node* hn;
53} cpu_entry_t;
54
55/* one cpu_entry_t per CPU */
56DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
57
58#define set_will_schedule() \
59 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
60#define clear_will_schedule() \
61 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
62#define test_will_schedule(cpu) \
63 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
64
65/*
66 * In C-EDF there is a cedf domain _per_ cluster
67 * The number of clusters is dynamically determined accordingly to the
68 * total cpu number and the cluster size
69 */
70typedef struct clusterdomain {
71 /* rt_domain for this cluster */
72 rt_domain_t domain;
73 /* cpus in this cluster */
74 cpu_entry_t* *cpus;
75 /* map of this cluster cpus */
76 cpumask_var_t cpu_map;
77 /* the cpus queue themselves according to priority in here */
78 struct bheap_node *heap_node;
79 struct bheap cpu_heap;
80 /* lock for this cluster */
81#define lock domain.ready_lock
82} cedf_domain_t;
83
84/* a cedf_domain per cluster; allocation is done at init/activation time */
85cedf_domain_t *cedf;
86
87#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
88#define task_cpu_cluster(task) remote_cluster(get_partition(task))
89
90/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
91 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
92 * information during the initialization of the plugin (e.g., topology)
93#define WANT_ALL_SCHED_EVENTS
94 */
95#define VERBOSE_INIT
96
97static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
98{
99 cpu_entry_t *a, *b;
100 a = _a->value;
101 b = _b->value;
102 /* Note that a and b are inverted: we want the lowest-priority CPU at
103 * the top of the heap.
104 */
105 return edf_higher_prio(b->linked, a->linked);
106}
107
108/* update_cpu_position - Move the cpu entry to the correct place to maintain
109 * order in the cpu queue. Caller must hold cedf lock.
110 */
111static void update_cpu_position(cpu_entry_t *entry)
112{
113 cedf_domain_t *cluster = entry->cluster;
114
115 if (likely(bheap_node_in_heap(entry->hn)))
116 bheap_delete(cpu_lower_prio,
117 &cluster->cpu_heap,
118 entry->hn);
119
120 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
121}
122
123/* caller must hold cedf lock */
124static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
125{
126 struct bheap_node* hn;
127 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
128 return hn->value;
129}
130
131
132/* link_task_to_cpu - Update the link of a CPU.
133 * Handles the case where the to-be-linked task is already
134 * scheduled on a different CPU.
135 */
136static noinline void link_task_to_cpu(struct task_struct* linked,
137 cpu_entry_t *entry)
138{
139 cpu_entry_t *sched;
140 struct task_struct* tmp;
141 int on_cpu;
142
143 BUG_ON(linked && !is_realtime(linked));
144
145 /* Currently linked task is set to be unlinked. */
146 if (entry->linked) {
147 entry->linked->rt_param.linked_on = NO_CPU;
148 }
149
150 /* Link new task to CPU. */
151 if (linked) {
152 set_rt_flags(linked, RT_F_RUNNING);
153 /* handle task is already scheduled somewhere! */
154 on_cpu = linked->rt_param.scheduled_on;
155 if (on_cpu != NO_CPU) {
156 sched = &per_cpu(cedf_cpu_entries, on_cpu);
157 /* this should only happen if not linked already */
158 BUG_ON(sched->linked == linked);
159
160 /* If we are already scheduled on the CPU to which we
161 * wanted to link, we don't need to do the swap --
162 * we just link ourselves to the CPU and depend on
163 * the caller to get things right.
164 */
165 if (entry != sched) {
166 TRACE_TASK(linked,
167 "already scheduled on %d, updating link.\n",
168 sched->cpu);
169 tmp = sched->linked;
170 linked->rt_param.linked_on = sched->cpu;
171 sched->linked = linked;
172 update_cpu_position(sched);
173 linked = tmp;
174 }
175 }
176 if (linked) /* might be NULL due to swap */
177 linked->rt_param.linked_on = entry->cpu;
178 }
179 entry->linked = linked;
180#ifdef WANT_ALL_SCHED_EVENTS
181 if (linked)
182 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
183 else
184 TRACE("NULL linked to %d.\n", entry->cpu);
185#endif
186 update_cpu_position(entry);
187}
188
189/* unlink - Make sure a task is not linked any longer to an entry
190 * where it was linked before. Must hold cedf_lock.
191 */
192static noinline void unlink(struct task_struct* t)
193{
194 cpu_entry_t *entry;
195
196 if (unlikely(!t)) {
197 TRACE_BUG_ON(!t);
198 return;
199 }
200
201
202 if (t->rt_param.linked_on != NO_CPU) {
203 /* unlink */
204 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
205 t->rt_param.linked_on = NO_CPU;
206 link_task_to_cpu(NULL, entry);
207 } else if (is_queued(t)) {
208 /* This is an interesting situation: t is scheduled,
209 * but was just recently unlinked. It cannot be
210 * linked anywhere else (because then it would have
211 * been relinked to this CPU), thus it must be in some
212 * queue. We must remove it from the list in this
213 * case.
214 *
215 * in C-EDF case is should be somewhere in the queue for
216 * its domain, therefore and we can get the domain using
217 * task_cpu_cluster
218 */
219 remove(&(task_cpu_cluster(t))->domain, t);
220 }
221}
222
223
224/* preempt - force a CPU to reschedule
225 */
226static void preempt(cpu_entry_t *entry)
227{
228 preempt_if_preemptable(entry->scheduled, entry->cpu);
229}
230
231/* requeue - Put an unlinked task into gsn-edf domain.
232 * Caller must hold cedf_lock.
233 */
234static noinline void requeue(struct task_struct* task)
235{
236 cedf_domain_t *cluster = task_cpu_cluster(task);
237 BUG_ON(!task);
238 /* sanity check before insertion */
239 BUG_ON(is_queued(task));
240
241 if (is_released(task, litmus_clock()))
242 __add_ready(&cluster->domain, task);
243 else {
244 /* it has got to wait */
245 add_release(&cluster->domain, task);
246 }
247}
248
249/* check for any necessary preemptions */
250static void check_for_preemptions(cedf_domain_t *cluster)
251{
252 struct task_struct *task;
253 cpu_entry_t* last;
254
255 for(last = lowest_prio_cpu(cluster);
256 edf_preemption_needed(&cluster->domain, last->linked);
257 last = lowest_prio_cpu(cluster)) {
258 /* preemption necessary */
259 task = __take_ready(&cluster->domain);
260 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
261 task->pid, last->cpu);
262 if (last->linked)
263 requeue(last->linked);
264 link_task_to_cpu(task, last);
265 preempt(last);
266 }
267}
268
269/* cedf_job_arrival: task is either resumed or released */
270static noinline void cedf_job_arrival(struct task_struct* task)
271{
272 cedf_domain_t *cluster = task_cpu_cluster(task);
273 BUG_ON(!task);
274
275 requeue(task);
276 check_for_preemptions(cluster);
277}
278
279static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
280{
281 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
282 unsigned long flags;
283
284 spin_lock_irqsave(&cluster->lock, flags);
285
286 __merge_ready(&cluster->domain, tasks);
287 check_for_preemptions(cluster);
288
289 spin_unlock_irqrestore(&cluster->lock, flags);
290}
291
292/* caller holds cedf_lock */
293static noinline void job_completion(struct task_struct *t, int forced)
294{
295 BUG_ON(!t);
296
297 sched_trace_task_completion(t, forced);
298
299 TRACE_TASK(t, "job_completion().\n");
300
301 /* set flags */
302 set_rt_flags(t, RT_F_SLEEP);
303 /* prepare for next period */
304 prepare_for_next_period(t);
305 if (is_released(t, litmus_clock()))
306 sched_trace_task_release(t);
307 /* unlink */
308 unlink(t);
309 /* requeue
310 * But don't requeue a blocking task. */
311 if (is_running(t))
312 cedf_job_arrival(t);
313}
314
315/* cedf_tick - this function is called for every local timer
316 * interrupt.
317 *
318 * checks whether the current task has expired and checks
319 * whether we need to preempt it if it has not expired
320 */
321static void cedf_tick(struct task_struct* t)
322{
323 if (is_realtime(t) && budget_exhausted(t)) {
324 if (!is_np(t)) {
325 /* np tasks will be preempted when they become
326 * preemptable again
327 */
328 set_tsk_need_resched(t);
329 set_will_schedule();
330 TRACE("cedf_scheduler_tick: "
331 "%d is preemptable "
332 " => FORCE_RESCHED\n", t->pid);
333 } else if (is_user_np(t)) {
334 TRACE("cedf_scheduler_tick: "
335 "%d is non-preemptable, "
336 "preemption delayed.\n", t->pid);
337 request_exit_np(t);
338 }
339 }
340}
341
342/* Getting schedule() right is a bit tricky. schedule() may not make any
343 * assumptions on the state of the current task since it may be called for a
344 * number of reasons. The reasons include a scheduler_tick() determined that it
345 * was necessary, because sys_exit_np() was called, because some Linux
346 * subsystem determined so, or even (in the worst case) because there is a bug
347 * hidden somewhere. Thus, we must take extreme care to determine what the
348 * current state is.
349 *
350 * The CPU could currently be scheduling a task (or not), be linked (or not).
351 *
352 * The following assertions for the scheduled task could hold:
353 *
354 * - !is_running(scheduled) // the job blocks
355 * - scheduled->timeslice == 0 // the job completed (forcefully)
356 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
357 * - linked != scheduled // we need to reschedule (for any reason)
358 * - is_np(scheduled) // rescheduling must be delayed,
359 * sys_exit_np must be requested
360 *
361 * Any of these can occur together.
362 */
363static struct task_struct* cedf_schedule(struct task_struct * prev)
364{
365 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
366 cedf_domain_t *cluster = entry->cluster;
367 int out_of_time, sleep, preempt, np, exists, blocks;
368 struct task_struct* next = NULL;
369
370 spin_lock(&cluster->lock);
371 clear_will_schedule();
372
373 /* sanity checking */
374 BUG_ON(entry->scheduled && entry->scheduled != prev);
375 BUG_ON(entry->scheduled && !is_realtime(prev));
376 BUG_ON(is_realtime(prev) && !entry->scheduled);
377
378 /* (0) Determine state */
379 exists = entry->scheduled != NULL;
380 blocks = exists && !is_running(entry->scheduled);
381 out_of_time = exists && budget_exhausted(entry->scheduled);
382 np = exists && is_np(entry->scheduled);
383 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
384 preempt = entry->scheduled != entry->linked;
385
386#ifdef WANT_ALL_SCHED_EVENTS
387 TRACE_TASK(prev, "invoked cedf_schedule.\n");
388#endif
389
390 if (exists)
391 TRACE_TASK(prev,
392 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
393 "state:%d sig:%d\n",
394 blocks, out_of_time, np, sleep, preempt,
395 prev->state, signal_pending(prev));
396 if (entry->linked && preempt)
397 TRACE_TASK(prev, "will be preempted by %s/%d\n",
398 entry->linked->comm, entry->linked->pid);
399
400
401 /* If a task blocks we have no choice but to reschedule.
402 */
403 if (blocks)
404 unlink(entry->scheduled);
405
406 /* Request a sys_exit_np() call if we would like to preempt but cannot.
407 * We need to make sure to update the link structure anyway in case
408 * that we are still linked. Multiple calls to request_exit_np() don't
409 * hurt.
410 */
411 if (np && (out_of_time || preempt || sleep)) {
412 unlink(entry->scheduled);
413 request_exit_np(entry->scheduled);
414 }
415
416 /* Any task that is preemptable and either exhausts its execution
417 * budget or wants to sleep completes. We may have to reschedule after
418 * this. Don't do a job completion if we block (can't have timers running
419 * for blocked jobs). Preemption go first for the same reason.
420 */
421 if (!np && (out_of_time || sleep) && !blocks && !preempt)
422 job_completion(entry->scheduled, !sleep);
423
424 /* Link pending task if we became unlinked.
425 */
426 if (!entry->linked)
427 link_task_to_cpu(__take_ready(&cluster->domain), entry);
428
429 /* The final scheduling decision. Do we need to switch for some reason?
430 * If linked is different from scheduled, then select linked as next.
431 */
432 if ((!np || blocks) &&
433 entry->linked != entry->scheduled) {
434 /* Schedule a linked job? */
435 if (entry->linked) {
436 entry->linked->rt_param.scheduled_on = entry->cpu;
437 next = entry->linked;
438 }
439 if (entry->scheduled) {
440 /* not gonna be scheduled soon */
441 entry->scheduled->rt_param.scheduled_on = NO_CPU;
442 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
443 }
444 } else
445 /* Only override Linux scheduler if we have a real-time task
446 * scheduled that needs to continue.
447 */
448 if (exists)
449 next = prev;
450
451 spin_unlock(&cluster->lock);
452
453#ifdef WANT_ALL_SCHED_EVENTS
454 TRACE("cedf_lock released, next=0x%p\n", next);
455
456 if (next)
457 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
458 else if (exists && !next)
459 TRACE("becomes idle at %llu.\n", litmus_clock());
460#endif
461
462
463 return next;
464}
465
466
467/* _finish_switch - we just finished the switch away from prev
468 */
469static void cedf_finish_switch(struct task_struct *prev)
470{
471 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
472
473 entry->scheduled = is_realtime(current) ? current : NULL;
474#ifdef WANT_ALL_SCHED_EVENTS
475 TRACE_TASK(prev, "switched away from\n");
476#endif
477}
478
479
480/* Prepare a task for running in RT mode
481 */
482static void cedf_task_new(struct task_struct * t, int on_rq, int running)
483{
484 unsigned long flags;
485 cpu_entry_t* entry;
486 cedf_domain_t* cluster;
487
488 TRACE("gsn edf: task new %d\n", t->pid);
489
490 /* the cluster doesn't change even if t is running */
491 cluster = task_cpu_cluster(t);
492
493 spin_lock_irqsave(&cluster->domain.ready_lock, flags);
494
495 /* setup job params */
496 release_at(t, litmus_clock());
497
498 if (running) {
499 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
500 BUG_ON(entry->scheduled);
501
502 entry->scheduled = t;
503 tsk_rt(t)->scheduled_on = task_cpu(t);
504 } else {
505 t->rt_param.scheduled_on = NO_CPU;
506 }
507 t->rt_param.linked_on = NO_CPU;
508
509 cedf_job_arrival(t);
510 spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
511}
512
513static void cedf_task_wake_up(struct task_struct *task)
514{
515 unsigned long flags;
516 lt_t now;
517 cedf_domain_t *cluster;
518
519 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
520
521 cluster = task_cpu_cluster(task);
522
523 spin_lock_irqsave(&cluster->lock, flags);
524 /* We need to take suspensions because of semaphores into
525 * account! If a job resumes after being suspended due to acquiring
526 * a semaphore, it should never be treated as a new job release.
527 */
528 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
529 set_rt_flags(task, RT_F_RUNNING);
530 } else {
531 now = litmus_clock();
532 if (is_tardy(task, now)) {
533 /* new sporadic release */
534 release_at(task, now);
535 sched_trace_task_release(task);
536 }
537 else {
538 if (task->rt.time_slice) {
539 /* came back in time before deadline
540 */
541 set_rt_flags(task, RT_F_RUNNING);
542 }
543 }
544 }
545 cedf_job_arrival(task);
546 spin_unlock_irqrestore(&cluster->lock, flags);
547}
548
549static void cedf_task_block(struct task_struct *t)
550{
551 unsigned long flags;
552 cedf_domain_t *cluster;
553
554 TRACE_TASK(t, "block at %llu\n", litmus_clock());
555
556 cluster = task_cpu_cluster(t);
557
558 /* unlink if necessary */
559 spin_lock_irqsave(&cluster->lock, flags);
560 unlink(t);
561 spin_unlock_irqrestore(&cluster->lock, flags);
562
563 BUG_ON(!is_realtime(t));
564}
565
566
567static void cedf_task_exit(struct task_struct * t)
568{
569 unsigned long flags;
570 cedf_domain_t *cluster = task_cpu_cluster(t);
571
572 /* unlink if necessary */
573 spin_lock_irqsave(&cluster->lock, flags);
574 unlink(t);
575 if (tsk_rt(t)->scheduled_on != NO_CPU) {
576 cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
577 tsk_rt(t)->scheduled_on = NO_CPU;
578 }
579 spin_unlock_irqrestore(&cluster->lock, flags);
580
581 BUG_ON(!is_realtime(t));
582 TRACE_TASK(t, "RIP\n");
583}
584
585static long cedf_admit_task(struct task_struct* tsk)
586{
587 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
588}
589
590/* total number of cluster */
591static int num_clusters;
592/* we do not support cluster of different sizes */
593static unsigned int cluster_size;
594
595#ifdef VERBOSE_INIT
596static void print_cluster_topology(cpumask_var_t mask, int cpu)
597{
598 int chk;
599 char buf[255];
600
601 chk = cpulist_scnprintf(buf, 254, mask);
602 buf[chk] = '\0';
603 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
604
605}
606#endif
607
608static int clusters_allocated = 0;
609
610static void cleanup_cedf(void)
611{
612 int i;
613
614 if (clusters_allocated) {
615 for (i = 0; i < num_clusters; i++) {
616 kfree(cedf[i].cpus);
617 kfree(cedf[i].heap_node);
618 free_cpumask_var(cedf[i].cpu_map);
619 }
620
621 kfree(cedf);
622 }
623}
624
625static long cedf_activate_plugin(void)
626{
627 int i, j, cpu, ccpu, cpu_count;
628 cpu_entry_t *entry;
629
630 cpumask_var_t mask;
631 int chk = 0;
632
633 /* de-allocate old clusters, if any */
634 cleanup_cedf();
635
636 printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n",
637 cluster_cache_index);
638
639 /* need to get cluster_size first */
640 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
641 return -ENOMEM;
642
643 chk = get_shared_cpu_map(mask, 0, cluster_cache_index);
644 if (chk) {
645 /* if chk != 0 then it is the max allowed index */
646 printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n",
647 cluster_cache_index);
648 printk(KERN_INFO "C-EDF: Using cache index = %d\n",
649 chk);
650 cluster_cache_index = chk;
651 }
652
653 cluster_size = cpumask_weight(mask);
654
655 if ((num_online_cpus() % cluster_size) != 0) {
656 /* this can't be right, some cpus are left out */
657 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
658 num_online_cpus(), cluster_size);
659 return -1;
660 }
661
662 num_clusters = num_online_cpus() / cluster_size;
663 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
664 num_clusters, cluster_size);
665
666 /* initialize clusters */
667 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
668 for (i = 0; i < num_clusters; i++) {
669
670 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
671 GFP_ATOMIC);
672 cedf[i].heap_node = kmalloc(
673 cluster_size * sizeof(struct bheap_node),
674 GFP_ATOMIC);
675 bheap_init(&(cedf[i].cpu_heap));
676 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
677
678 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
679 return -ENOMEM;
680 }
681
682 /* cycle through cluster and add cpus to them */
683 for (i = 0; i < num_clusters; i++) {
684
685 for_each_online_cpu(cpu) {
686 /* check if the cpu is already in a cluster */
687 for (j = 0; j < num_clusters; j++)
688 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
689 break;
690 /* if it is in a cluster go to next cpu */
691 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
692 continue;
693
694 /* this cpu isn't in any cluster */
695 /* get the shared cpus */
696 get_shared_cpu_map(mask, cpu, cluster_cache_index);
697 cpumask_copy(cedf[i].cpu_map, mask);
698#ifdef VERBOSE_INIT
699 print_cluster_topology(mask, cpu);
700#endif
701 /* add cpus to current cluster and init cpu_entry_t */
702 cpu_count = 0;
703 for_each_cpu(ccpu, cedf[i].cpu_map) {
704
705 entry = &per_cpu(cedf_cpu_entries, ccpu);
706 cedf[i].cpus[cpu_count] = entry;
707 atomic_set(&entry->will_schedule, 0);
708 entry->cpu = ccpu;
709 entry->cluster = &cedf[i];
710 entry->hn = &(cedf[i].heap_node[cpu_count]);
711 bheap_node_init(&entry->hn, entry);
712
713 cpu_count++;
714
715 entry->linked = NULL;
716 entry->scheduled = NULL;
717 update_cpu_position(entry);
718 }
719 /* done with this cluster */
720 break;
721 }
722 }
723
724 free_cpumask_var(mask);
725 clusters_allocated = 1;
726 return 0;
727}
728
729/* Plugin object */
730static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
731 .plugin_name = "C-EDF",
732 .finish_switch = cedf_finish_switch,
733 .tick = cedf_tick,
734 .task_new = cedf_task_new,
735 .complete_job = complete_job,
736 .task_exit = cedf_task_exit,
737 .schedule = cedf_schedule,
738 .task_wake_up = cedf_task_wake_up,
739 .task_block = cedf_task_block,
740 .admit_task = cedf_admit_task,
741 .activate_plugin = cedf_activate_plugin,
742};
743
744
745static int __init init_cedf(void)
746{
747 return register_sched_plugin(&cedf_plugin);
748}
749
750static void clean_cedf(void)
751{
752 cleanup_cedf();
753}
754
755module_init(init_cedf);
756module_exit(clean_cedf);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index bc7c0e93fb18..3767b30e610a 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -171,6 +171,14 @@ struct sched_plugin linux_sched_plugin = {
171}; 171};
172 172
173/* 173/*
174 * The cluster size is needed in C-EDF: it makes sense only to cluster
175 * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster
176 * all the CPUs that shares a L2 cache, while cluster_cache_index = 3
177 * we cluster all CPs that shares a L3 cache
178 */
179int cluster_cache_index = 2;
180
181/*
174 * The reference to current plugin that is used to schedule tasks within 182 * The reference to current plugin that is used to schedule tasks within
175 * the system. It stores references to actual function implementations 183 * the system. It stores references to actual function implementations
176 * Should be initialized by calling "init_***_plugin()" 184 * Should be initialized by calling "init_***_plugin()"