aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Erickson <jerickso@cs.unc.edu>2012-08-30 14:50:14 -0400
committerJeremy Erickson <jerickso@cs.unc.edu>2012-08-30 14:50:14 -0400
commit1583cb3ffb768cd93c1104ccc12562dd8eed7ba2 (patch)
treecf50e14012183653dbee706a45dd065b06be6007
parent2d49efa8497aceda474122860b78e393838d2019 (diff)
Initial support for C-FL with splitting
-rw-r--r--litmus/Makefile5
-rw-r--r--litmus/sanitizegfl.pl11
-rw-r--r--litmus/sched_cfl_split.c1027
3 files changed, 1041 insertions, 2 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index 988cf7b6df89..b892f356f2be 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -11,7 +11,7 @@ obj-y = sched_plugin.o litmus.o \
11 sync.o \ 11 sync.o \
12 rt_domain.o \ 12 rt_domain.o \
13 edf_common.o \ 13 edf_common.o \
14 edf_split_common.o \ 14 edf_split_common.o \
15 fdso.o \ 15 fdso.o \
16 locking.o \ 16 locking.o \
17 srp.o \ 17 srp.o \
@@ -19,7 +19,8 @@ obj-y = sched_plugin.o litmus.o \
19 ctrldev.o \ 19 ctrldev.o \
20 sched_gsn_edf.o \ 20 sched_gsn_edf.o \
21 sched_gsn_edf_split.o \ 21 sched_gsn_edf_split.o \
22 sched_gfl_split.o \ 22 sched_gfl_split.o \
23 sched_cfl_split.o \
23 sched_psn_edf.o 24 sched_psn_edf.o
24 25
25obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 26obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
diff --git a/litmus/sanitizegfl.pl b/litmus/sanitizegfl.pl
new file mode 100644
index 000000000000..aa28811ae46c
--- /dev/null
+++ b/litmus/sanitizegfl.pl
@@ -0,0 +1,11 @@
1# A quick hack to allow diff to work between sched_gsn_edf.c and
2# sched_gsn_edf_split.c
3open INFILE, "<sched_gfl_split.c";
4open OUTFILE, ">sched_gfl_split_namechange.c";
5while (my $line = <INFILE>){
6 $line =~ s/gflsplit/gsnedf/g;
7 $line =~ s/G-FL-split/GSN-EDF/g;
8 print OUTFILE $line;
9}
10close INFILE;
11close OUTFILE;
diff --git a/litmus/sched_cfl_split.c b/litmus/sched_cfl_split.c
new file mode 100644
index 000000000000..6783b4c94366
--- /dev/null
+++ b/litmus/sched_cfl_split.c
@@ -0,0 +1,1027 @@
1/*
2 * litmus/sched_cfl_split.c
3 *
4 * Implementation of a clustered version of the G-FL scheduling algorithm,
5 * with job splitting.
6 *
7 * This implementation is based on G-FL-split:
8 * - CPUs are clustered around L2 or L3 caches.
9 * - Clusters topology is automatically detected (this is arch dependent
10 * and is working only on x86 at the moment --- and only with modern
11 * cpus that exports cpuid4 information)
12 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
13 * the programmer needs to be aware of the topology to place tasks
14 * in the desired cluster
15 * - default clustering is around L2 cache (cache index = 2)
16 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
17 * online_cpus are placed in a single cluster).
18 *
19 * For details on functions, take a look at sched_gsn_edf.c
20 *
21 * Currently, we do not support changes in the number of online cpus.
22 * If the num_online_cpus() dynamically changes, the plugin is broken.
23 *
24 * This version uses the simple approach and serializes all scheduling
25 * decisions by the use of a queue lock. This is probably not the
26 * best way to do it, but it should suffice for now.
27 */
28
29#include <linux/spinlock.h>
30#include <linux/percpu.h>
31#include <linux/sched.h>
32#include <linux/slab.h>
33
34#include <linux/module.h>
35
36#include <litmus/litmus.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/edf_split_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h>
49#endif
50
51/* to configure the cluster size */
52#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h>
54
55/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
57 * all CPUs form a single cluster (just like G-FL).
58 */
59static enum cache_level cluster_config = GLOBAL_CLUSTER;
60
61struct clusterdomain;
62
63/* cpu_entry_t - maintain the linked and scheduled state
64 *
65 * A cpu also contains a pointer to the cflsplit_domain_t cluster
66 * that owns it (struct clusterdomain*)
67 */
68typedef struct {
69 int cpu;
70 struct clusterdomain* cluster; /* owning cluster */
71 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn;
75 struct hrtimer split_timer;
76 int timer_armed;
77} cpu_entry_t;
78
79/* one cpu_entry_t per CPU */
80DEFINE_PER_CPU(cpu_entry_t, cflsplit_cpu_entries);
81
82#define set_will_schedule() \
83 (atomic_set(&__get_cpu_var(cflsplit_cpu_entries).will_schedule, 1))
84#define clear_will_schedule() \
85 (atomic_set(&__get_cpu_var(cflsplit_cpu_entries).will_schedule, 0))
86#define test_will_schedule(cpu) \
87 (atomic_read(&per_cpu(cflsplit_cpu_entries, cpu).will_schedule))
88
89/*
90 * In C-FL-split there is a cflsplit domain _per_ cluster
91 * The number of clusters is dynamically determined accordingly to the
92 * total cpu number and the cluster size
93 */
94typedef struct clusterdomain {
95 /* rt_domain for this cluster */
96 rt_domain_t domain;
97 /* cpus in this cluster */
98 cpu_entry_t* *cpus;
99 /* map of this cluster cpus */
100 cpumask_var_t cpu_map;
101 /* the cpus queue themselves according to priority in here */
102 struct bheap_node *heap_node;
103 struct bheap cpu_heap;
104 /* lock for this cluster */
105#define cluster_lock domain.ready_lock
106} cflsplit_domain_t;
107
108/* a cflsplit_domain per cluster; allocation is done at init/activation time */
109cflsplit_domain_t *cflsplit;
110
111#define remote_cluster(cpu) ((cflsplit_domain_t *) per_cpu(cflsplit_cpu_entries, cpu).cluster)
112#define task_cpu_cluster(task) remote_cluster(get_partition(task))
113
114/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
115 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
116 * information during the initialization of the plugin (e.g., topology)
117#define WANT_ALL_SCHED_EVENTS
118 */
119#define VERBOSE_INIT
120
121inline static int get_slice_num(struct task_struct* t)
122{
123 int basic = ((t->rt_param.job_params.exec_time *
124 t->rt_param.task_params.split) /
125 t->rt_param.task_params.exec_cost) + 1;
126 if (basic <= t->rt_param.task_params.split){
127 return basic;
128 }
129 else{
130 /*Since we don't police budget, just leave where it's at.*/
131 return t->rt_param.task_params.split;
132 }
133}
134
135/* Returns the appropriate subjob deadline.*/
136inline static lt_t get_proper_deadline(struct task_struct* t)
137{
138 unsigned int num_cpus = num_online_cpus();
139 return t->rt_param.job_params.release +
140 ((t->rt_param.task_params.period * get_slice_num(t))
141 / t->rt_param.task_params.split)
142 /* G-FL correction */
143 - (((num_cpus - 1) * t->rt_param.task_params.exec_cost)
144 / (num_cpus * t->rt_param.task_params.split));
145}
146
147/* Tells us if the current deadline is too small.*/
148inline static int needs_deadline_move(struct task_struct* t)
149{
150 BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline);
151#ifdef CONFIG_LITMUS_LOCKING
152 return !is_in_crit_section(t) &&
153 (get_proper_deadline(t) !=
154 tsk_rt(t)->job_params.subjob_deadline);
155#else
156 return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline;
157#endif
158}
159
160/*Returns execution time until the next deadline move.
161 * 0 means the task has no more deadline moves
162 */
163inline static lt_t time_to_next_move(struct task_struct* t)
164{
165 if (get_slice_num(t) == t->rt_param.task_params.split){
166 return 0;
167 }
168 /* +1 upper bounds ceiling, since integer division is floor*/
169 return ((get_slice_num(t) * t->rt_param.task_params.exec_cost)
170 / t->rt_param.task_params.split) + 1
171 - t->rt_param.job_params.exec_time;
172}
173
174/* Timer stuff - similar to budget.c. */
175static enum hrtimer_restart on_split_timeout(struct hrtimer *timer)
176{
177 cpu_entry_t* st = container_of(timer,
178 cpu_entry_t,
179 split_timer);
180
181 unsigned long flags;
182
183 local_irq_save(flags);
184 TRACE("split timer fired.\n");
185 st->timer_armed = 0;
186 /* Activate scheduler */
187 litmus_reschedule_local();
188 local_irq_restore(flags);
189
190 return HRTIMER_NORESTART;
191}
192
193static void cancel_split_timer(cpu_entry_t* ce)
194{
195 int ret;
196
197 TRACE("cancelling split time.\n");
198
199 /* Since interrupts are disabled and et->timer_armed is only
200 * modified locally, we do not need any locks.
201 */
202
203 if (ce->timer_armed) {
204 ret = hrtimer_try_to_cancel(&ce->split_timer);
205 /* Should never be inactive. */
206 BUG_ON(ret == 0);
207 /* Should never be running concurrently.*/
208 BUG_ON(ret == -1);
209
210 ce->timer_armed = 0;
211 }
212}
213
214/* assumes called with IRQs off */
215static void arm_split_timer(cpu_entry_t *ce,
216 struct task_struct* t)
217{
218 lt_t when_to_fire;
219 lt_t time_to_move;
220 TRACE_TASK(t, "arming split timer.\n");
221
222 /* __hrtimer_start_range_ns() cancels the timer
223 * anyway, so we don't have to check whether it is still armed */
224
225 /*We won't do any new deadline moves if the budget has been exhausted*/
226 if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) {
227 when_to_fire = litmus_clock() + time_to_move;
228 TRACE_TASK(t, "actually arming for %llu into the future\n",
229 time_to_move);
230 __hrtimer_start_range_ns(&ce->split_timer,
231 ns_to_ktime(when_to_fire),
232 0 /* delta */,
233 HRTIMER_MODE_ABS_PINNED,
234 0 /* no wakeup */);
235 ce->timer_armed = 1;
236 }
237}
238
239static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
240{
241 cpu_entry_t *a, *b;
242 a = _a->value;
243 b = _b->value;
244 /* Note that a and b are inverted: we want the lowest-priority CPU at
245 * the top of the heap.
246 */
247 return edf_split_higher_prio(b->linked, a->linked);
248}
249
250/* update_cpu_position - Move the cpu entry to the correct place to maintain
251 * order in the cpu queue. Caller must hold cflsplit lock.
252 */
253static void update_cpu_position(cpu_entry_t *entry)
254{
255 cflsplit_domain_t *cluster = entry->cluster;
256
257 if (likely(bheap_node_in_heap(entry->hn)))
258 bheap_delete(cpu_lower_prio,
259 &cluster->cpu_heap,
260 entry->hn);
261
262 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
263}
264
265/* caller must hold cflsplit lock */
266static cpu_entry_t* lowest_prio_cpu(cflsplit_domain_t *cluster)
267{
268 struct bheap_node* hn;
269 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
270 return hn->value;
271}
272
273
274/* link_task_to_cpu - Update the link of a CPU.
275 * Handles the case where the to-be-linked task is already
276 * scheduled on a different CPU.
277 */
278static noinline void link_task_to_cpu(struct task_struct* linked,
279 cpu_entry_t *entry)
280{
281 cpu_entry_t *sched;
282 struct task_struct* tmp;
283 int on_cpu;
284
285 BUG_ON(linked && !is_realtime(linked));
286
287 /* Currently linked task is set to be unlinked. */
288 if (entry->linked) {
289 entry->linked->rt_param.linked_on = NO_CPU;
290 }
291
292 /* Link new task to CPU. */
293 if (linked) {
294 set_rt_flags(linked, RT_F_RUNNING);
295 /* handle task is already scheduled somewhere! */
296 on_cpu = linked->rt_param.scheduled_on;
297 if (on_cpu != NO_CPU) {
298 sched = &per_cpu(cflsplit_cpu_entries, on_cpu);
299 /* this should only happen if not linked already */
300 BUG_ON(sched->linked == linked);
301
302 /* If we are already scheduled on the CPU to which we
303 * wanted to link, we don't need to do the swap --
304 * we just link ourselves to the CPU and depend on
305 * the caller to get things right.
306 */
307 if (entry != sched) {
308 TRACE_TASK(linked,
309 "already scheduled on %d, updating link.\n",
310 sched->cpu);
311 tmp = sched->linked;
312 linked->rt_param.linked_on = sched->cpu;
313 sched->linked = linked;
314 update_cpu_position(sched);
315 linked = tmp;
316 }
317 }
318 if (linked) /* might be NULL due to swap */
319 linked->rt_param.linked_on = entry->cpu;
320 }
321 entry->linked = linked;
322#ifdef WANT_ALL_SCHED_EVENTS
323 if (linked)
324 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
325 else
326 TRACE("NULL linked to %d.\n", entry->cpu);
327#endif
328 update_cpu_position(entry);
329}
330
331/* unlink - Make sure a task is not linked any longer to an entry
332 * where it was linked before. Must hold cflsplit_lock.
333 */
334static noinline void unlink(struct task_struct* t)
335{
336 cpu_entry_t *entry;
337
338 if (t->rt_param.linked_on != NO_CPU) {
339 /* unlink */
340 entry = &per_cpu(cflsplit_cpu_entries, t->rt_param.linked_on);
341 t->rt_param.linked_on = NO_CPU;
342 link_task_to_cpu(NULL, entry);
343 } else if (is_queued(t)) {
344 /* This is an interesting situation: t is scheduled,
345 * but was just recently unlinked. It cannot be
346 * linked anywhere else (because then it would have
347 * been relinked to this CPU), thus it must be in some
348 * queue. We must remove it from the list in this
349 * case.
350 *
351 * in C-FL-split case is should be somewhere in the queue for
352 * its domain, therefore and we can get the domain using
353 * task_cpu_cluster
354 */
355 remove(&(task_cpu_cluster(t))->domain, t);
356 }
357}
358
359
360/* preempt - force a CPU to reschedule
361 */
362static void preempt(cpu_entry_t *entry)
363{
364 preempt_if_preemptable(entry->scheduled, entry->cpu);
365}
366
367/* requeue - Put an unlinked task into gsn-edf domain.
368 * Caller must hold cflsplit_lock.
369 */
370static noinline void requeue(struct task_struct* task)
371{
372 cflsplit_domain_t *cluster = task_cpu_cluster(task);
373 BUG_ON(!task);
374 /* sanity check before insertion */
375 BUG_ON(is_queued(task));
376
377 if (is_released(task, litmus_clock()))
378 __add_ready(&cluster->domain, task);
379 else {
380 /* it has got to wait */
381 add_release(&cluster->domain, task);
382 }
383}
384
385#ifdef CONFIG_SCHED_CPU_AFFINITY
386static cpu_entry_t* cflsplit_get_nearest_available_cpu(
387 cflsplit_domain_t *cluster, cpu_entry_t *start)
388{
389 cpu_entry_t *affinity;
390
391 get_nearest_available_cpu(affinity, start, cflsplit_cpu_entries,
392#ifdef CONFIG_RELEASE_MASTER
393 cluster->domain.release_master
394#else
395 NO_CPU
396#endif
397 );
398
399 /* make sure CPU is in our cluster */
400 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
401 return(affinity);
402 else
403 return(NULL);
404}
405#endif
406
407
408/* check for any necessary preemptions */
409static void check_for_preemptions(cflsplit_domain_t *cluster)
410{
411 struct task_struct *task;
412 cpu_entry_t *last;
413
414 for(last = lowest_prio_cpu(cluster);
415 edf_split_preemption_needed(&cluster->domain, last->linked);
416 last = lowest_prio_cpu(cluster)) {
417 /* preemption necessary */
418 task = __take_ready(&cluster->domain);
419 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
420 task->pid, last->cpu);
421#ifdef CONFIG_SCHED_CPU_AFFINITY
422 {
423 cpu_entry_t *affinity =
424 cflsplit_get_nearest_available_cpu(cluster,
425 &per_cpu(cflsplit_cpu_entries, task_cpu(task)));
426 if(affinity)
427 last = affinity;
428 else if(last->linked)
429 requeue(last->linked);
430 }
431#else
432 if (last->linked)
433 requeue(last->linked);
434#endif
435 link_task_to_cpu(task, last);
436 preempt(last);
437 }
438}
439
440/* cflsplit_job_arrival: task is either resumed or released */
441static noinline void cflsplit_job_arrival(struct task_struct* task)
442{
443 cflsplit_domain_t *cluster = task_cpu_cluster(task);
444 BUG_ON(!task);
445
446 requeue(task);
447 check_for_preemptions(cluster);
448}
449
450static void cflsplit_release_jobs(rt_domain_t* rt, struct bheap* tasks)
451{
452 cflsplit_domain_t* cluster = container_of(rt, cflsplit_domain_t, domain);
453 unsigned long flags;
454
455 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
456
457 __merge_ready(&cluster->domain, tasks);
458 check_for_preemptions(cluster);
459
460 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
461}
462
463/* caller holds cflsplit_lock */
464static noinline void job_completion(struct task_struct *t, int forced)
465{
466 BUG_ON(!t);
467
468 sched_trace_task_completion(t, forced);
469
470 TRACE_TASK(t, "job_completion().\n");
471
472 /* set flags */
473 set_rt_flags(t, RT_F_SLEEP);
474 /* prepare for next period */
475 prepare_for_next_period(t);
476 /* We now also set the subjob deadline to what it should be for
477 * scheduling priority.
478 */
479 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
480 if (is_released(t, litmus_clock()))
481 sched_trace_task_release(t);
482 /* unlink */
483 unlink(t);
484 /* requeue
485 * But don't requeue a blocking task. */
486 if (is_running(t))
487 cflsplit_job_arrival(t);
488}
489
490static void move_deadline(struct task_struct *t)
491{
492 tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
493 /* Check if rescheduling needed with lower priority. */
494 unlink(t);
495 cflsplit_job_arrival(t);
496}
497
498/* cflsplit_tick - this function is called for every local timer
499 * interrupt.
500 *
501 * checks whether the current task has expired and checks
502 * whether we need to preempt it if it has not expired
503 */
504static void cflsplit_tick(struct task_struct* t)
505{
506 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
507 if (!is_np(t)) {
508 /* np tasks will be preempted when they become
509 * preemptable again
510 */
511 litmus_reschedule_local();
512 set_will_schedule();
513 TRACE("cflsplit_scheduler_tick: "
514 "%d is preemptable "
515 " => FORCE_RESCHED\n", t->pid);
516 } else if (is_user_np(t)) {
517 TRACE("cflsplit_scheduler_tick: "
518 "%d is non-preemptable, "
519 "preemption delayed.\n", t->pid);
520 request_exit_np(t);
521 }
522 }
523}
524
525/* Getting schedule() right is a bit tricky. schedule() may not make any
526 * assumptions on the state of the current task since it may be called for a
527 * number of reasons. The reasons include a scheduler_tick() determined that it
528 * was necessary, because sys_exit_np() was called, because some Linux
529 * subsystem determined so, or even (in the worst case) because there is a bug
530 * hidden somewhere. Thus, we must take extreme care to determine what the
531 * current state is.
532 *
533 * The CPU could currently be scheduling a task (or not), be linked (or not).
534 *
535 * The following assertions for the scheduled task could hold:
536 *
537 * - !is_running(scheduled) // the job blocks
538 * - scheduled->timeslice == 0 // the job completed (forcefully)
539 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
540 * - linked != scheduled // we need to reschedule (for any reason)
541 * - is_np(scheduled) // rescheduling must be delayed,
542 * sys_exit_np must be requested
543 *
544 * Any of these can occur together.
545 */
546static struct task_struct* cflsplit_schedule(struct task_struct * prev)
547{
548 cpu_entry_t* entry = &__get_cpu_var(cflsplit_cpu_entries);
549 cflsplit_domain_t *cluster = entry->cluster;
550 int out_of_time, sleep, preempt, np, exists, blocks, needs_move;
551 struct task_struct* next = NULL;
552
553#ifdef CONFIG_RELEASE_MASTER
554 /* Bail out early if we are the release master.
555 * The release master never schedules any real-time tasks.
556 */
557 if (unlikely(cluster->domain.release_master == entry->cpu)) {
558 sched_state_task_picked();
559 return NULL;
560 }
561#endif
562
563 raw_spin_lock(&cluster->cluster_lock);
564 clear_will_schedule();
565
566 /* sanity checking */
567 BUG_ON(entry->scheduled && entry->scheduled != prev);
568 BUG_ON(entry->scheduled && !is_realtime(prev));
569 BUG_ON(is_realtime(prev) && !entry->scheduled);
570
571 /* (0) Determine state */
572 exists = entry->scheduled != NULL;
573 blocks = exists && !is_running(entry->scheduled);
574 out_of_time = exists &&
575 budget_enforced(entry->scheduled) &&
576 budget_exhausted(entry->scheduled);
577 needs_move = exists && needs_deadline_move(entry->scheduled);
578 np = exists && is_np(entry->scheduled);
579 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
580 preempt = entry->scheduled != entry->linked;
581
582#ifdef WANT_ALL_SCHED_EVENTS
583 TRACE_TASK(prev, "invoked cflsplit_schedule.\n");
584#endif
585
586 if (exists)
587 TRACE_TASK(prev,
588 "blocks:%d out_of_time:%d needs_move: %d np:%d"
589 " sleep:%d preempt:%d state:%d sig:%d\n",
590 blocks, out_of_time, needs_move, np, sleep, preempt,
591 prev->state, signal_pending(prev));
592 if (entry->linked && preempt)
593 TRACE_TASK(prev, "will be preempted by %s/%d\n",
594 entry->linked->comm, entry->linked->pid);
595
596
597 /* If a task blocks we have no choice but to reschedule.
598 */
599 if (blocks)
600 unlink(entry->scheduled);
601
602 /* Request a sys_exit_np() call if we would like to preempt but cannot.
603 * We need to make sure to update the link structure anyway in case
604 * that we are still linked. Multiple calls to request_exit_np() don't
605 * hurt.
606 *
607 * Job deadline moves handled similarly
608 */
609 if (np && (out_of_time || preempt || sleep)) {
610 unlink(entry->scheduled);
611 request_exit_np(entry->scheduled);
612 }
613 else if (np && needs_move) {
614 move_deadline(entry->scheduled);
615 }
616
617 /* Any task that is preemptable and either exhausts its execution
618 * budget or wants to sleep completes. We may have to reschedule after
619 * this. Don't do a job completion if we block (can't have timers running
620 * for blocked jobs). Preemption go first for the same reason.
621 */
622 if (!np && (out_of_time || sleep) && !blocks && !preempt)
623 job_completion(entry->scheduled, !sleep);
624 else if (!np && needs_move && !blocks && !preempt) {
625 move_deadline(entry->scheduled);
626 }
627
628 /* Link pending task if we became unlinked.
629 */
630 if (!entry->linked)
631 link_task_to_cpu(__take_ready(&cluster->domain), entry);
632
633 /* The final scheduling decision. Do we need to switch for some reason?
634 * If linked is different from scheduled, then select linked as next.
635 */
636 if ((!np || blocks) &&
637 entry->linked != entry->scheduled) {
638 /* Schedule a linked job? */
639 if (entry->linked) {
640 entry->linked->rt_param.scheduled_on = entry->cpu;
641 next = entry->linked;
642 }
643 if (entry->scheduled) {
644 /* not gonna be scheduled soon */
645 entry->scheduled->rt_param.scheduled_on = NO_CPU;
646 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
647 }
648 } else
649 /* Only override Linux scheduler if we have a real-time task
650 * scheduled that needs to continue.
651 */
652 if (exists)
653 next = prev;
654
655 sched_state_task_picked();
656 raw_spin_unlock(&cluster->cluster_lock);
657
658 if (next) {
659 arm_split_timer(entry, next);
660 }
661 else if (entry->timer_armed) {
662 cancel_split_timer(entry);
663 }
664
665#ifdef WANT_ALL_SCHED_EVENTS
666 TRACE("cflsplit_lock released, next=0x%p\n", next);
667
668 if (next)
669 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
670 else if (exists && !next)
671 TRACE("becomes idle at %llu.\n", litmus_clock());
672#endif
673
674
675 return next;
676}
677
678
679/* _finish_switch - we just finished the switch away from prev
680 */
681static void cflsplit_finish_switch(struct task_struct *prev)
682{
683 cpu_entry_t* entry = &__get_cpu_var(cflsplit_cpu_entries);
684
685 entry->scheduled = is_realtime(current) ? current : NULL;
686#ifdef WANT_ALL_SCHED_EVENTS
687 TRACE_TASK(prev, "switched away from\n");
688#endif
689}
690
691
692static void cflsplit_release_at(struct task_struct *t, lt_t start)
693{
694 t->rt_param.job_params.deadline = start;
695 prepare_for_next_period(t);
696 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
697 set_rt_flags(t, RT_F_RUNNING);
698}
699
700
701/* Prepare a task for running in RT mode
702 */
703static void cflsplit_task_new(struct task_struct * t, int on_rq, int running)
704{
705 unsigned long flags;
706 cpu_entry_t* entry;
707 cflsplit_domain_t* cluster;
708
709 TRACE("gsn edf: task new %d\n", t->pid);
710
711 /* the cluster doesn't change even if t is running */
712 cluster = task_cpu_cluster(t);
713
714 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
715
716 /* setup job params */
717 cflsplit_release_at(t, litmus_clock());
718
719 if (running) {
720 entry = &per_cpu(cflsplit_cpu_entries, task_cpu(t));
721 BUG_ON(entry->scheduled);
722
723#ifdef CONFIG_RELEASE_MASTER
724 if (entry->cpu != cluster->domain.release_master) {
725#endif
726 entry->scheduled = t;
727 tsk_rt(t)->scheduled_on = task_cpu(t);
728#ifdef CONFIG_RELEASE_MASTER
729 } else {
730 /* do not schedule on release master */
731 preempt(entry); /* force resched */
732 tsk_rt(t)->scheduled_on = NO_CPU;
733 }
734#endif
735 } else {
736 t->rt_param.scheduled_on = NO_CPU;
737 }
738 t->rt_param.linked_on = NO_CPU;
739
740 cflsplit_job_arrival(t);
741 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
742}
743
744static void cflsplit_task_wake_up(struct task_struct *task)
745{
746 unsigned long flags;
747 lt_t now;
748 cflsplit_domain_t *cluster;
749
750 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
751
752 cluster = task_cpu_cluster(task);
753
754 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
755 /* We need to take suspensions because of semaphores into
756 * account! If a job resumes after being suspended due to acquiring
757 * a semaphore, it should never be treated as a new job release.
758 */
759 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
760 set_rt_flags(task, RT_F_RUNNING);
761 } else {
762 now = litmus_clock();
763 if (is_tardy(task, now)) {
764 /* new sporadic release */
765 cflsplit_release_at(task, now);
766 sched_trace_task_release(task);
767 }
768 else {
769 if (task->rt.time_slice) {
770 /* came back in time before deadline
771 */
772 set_rt_flags(task, RT_F_RUNNING);
773 }
774 }
775 }
776 cflsplit_job_arrival(task);
777 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
778}
779
780static void cflsplit_task_block(struct task_struct *t)
781{
782 unsigned long flags;
783 cflsplit_domain_t *cluster;
784
785 TRACE_TASK(t, "block at %llu\n", litmus_clock());
786
787 cluster = task_cpu_cluster(t);
788
789 /* unlink if necessary */
790 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
791 unlink(t);
792 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
793
794 BUG_ON(!is_realtime(t));
795}
796
797
798static void cflsplit_task_exit(struct task_struct * t)
799{
800 unsigned long flags;
801 cflsplit_domain_t *cluster = task_cpu_cluster(t);
802
803 /* unlink if necessary */
804 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
805 unlink(t);
806 if (tsk_rt(t)->scheduled_on != NO_CPU) {
807 cpu_entry_t *cpu;
808 cpu = &per_cpu(cflsplit_cpu_entries, tsk_rt(t)->scheduled_on);
809 cpu->scheduled = NULL;
810 tsk_rt(t)->scheduled_on = NO_CPU;
811 }
812 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
813
814 BUG_ON(!is_realtime(t));
815 TRACE_TASK(t, "RIP\n");
816}
817
818static long cflsplit_admit_task(struct task_struct* tsk)
819{
820 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
821}
822
823/* total number of cluster */
824static int num_clusters;
825/* we do not support cluster of different sizes */
826static unsigned int cluster_size;
827
828#ifdef VERBOSE_INIT
829static void print_cluster_topology(cpumask_var_t mask, int cpu)
830{
831 int chk;
832 char buf[255];
833
834 chk = cpulist_scnprintf(buf, 254, mask);
835 buf[chk] = '\0';
836 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
837
838}
839#endif
840
841static int clusters_allocated = 0;
842
843static void cleanup_cflsplit(void)
844{
845 int i;
846
847 if (clusters_allocated) {
848 for (i = 0; i < num_clusters; i++) {
849 kfree(cflsplit[i].cpus);
850 kfree(cflsplit[i].heap_node);
851 free_cpumask_var(cflsplit[i].cpu_map);
852 }
853
854 kfree(cflsplit);
855 }
856}
857
858static long cflsplit_activate_plugin(void)
859{
860 int i, j, cpu, ccpu, cpu_count;
861 cpu_entry_t *entry;
862
863 cpumask_var_t mask;
864 int chk = 0;
865
866 /* de-allocate old clusters, if any */
867 cleanup_cflsplit();
868
869 printk(KERN_INFO "C-FL-split: Activate Plugin, cluster configuration = %d\n",
870 cluster_config);
871
872 /* need to get cluster_size first */
873 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
874 return -ENOMEM;
875
876 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
877 cluster_size = num_online_cpus();
878 } else {
879 chk = get_shared_cpu_map(mask, 0, cluster_config);
880 if (chk) {
881 /* if chk != 0 then it is the max allowed index */
882 printk(KERN_INFO "C-FL-split: Cluster configuration = %d "
883 "is not supported on this hardware.\n",
884 cluster_config);
885 /* User should notice that the configuration failed, so
886 * let's bail out. */
887 return -EINVAL;
888 }
889
890 cluster_size = cpumask_weight(mask);
891 }
892
893 if ((num_online_cpus() % cluster_size) != 0) {
894 /* this can't be right, some cpus are left out */
895 printk(KERN_ERR "C-FL-split: Trying to group %d cpus in %d!\n",
896 num_online_cpus(), cluster_size);
897 return -1;
898 }
899
900 num_clusters = num_online_cpus() / cluster_size;
901 printk(KERN_INFO "C-FL-split: %d cluster(s) of size = %d\n",
902 num_clusters, cluster_size);
903
904 /* initialize clusters */
905 cflsplit = kmalloc(num_clusters * sizeof(cflsplit_domain_t), GFP_ATOMIC);
906 for (i = 0; i < num_clusters; i++) {
907
908 cflsplit[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
909 GFP_ATOMIC);
910 cflsplit[i].heap_node = kmalloc(
911 cluster_size * sizeof(struct bheap_node),
912 GFP_ATOMIC);
913 bheap_init(&(cflsplit[i].cpu_heap));
914 edf_split_domain_init(&(cflsplit[i].domain), NULL,
915 cflsplit_release_jobs);
916
917 if(!zalloc_cpumask_var(&cflsplit[i].cpu_map, GFP_ATOMIC))
918 return -ENOMEM;
919#ifdef CONFIG_RELEASE_MASTER
920 cflsplit[i].domain.release_master = atomic_read(&release_master_cpu);
921#endif
922 }
923
924 /* cycle through cluster and add cpus to them */
925 for (i = 0; i < num_clusters; i++) {
926
927 for_each_online_cpu(cpu) {
928 /* check if the cpu is already in a cluster */
929 for (j = 0; j < num_clusters; j++)
930 if (cpumask_test_cpu(cpu, cflsplit[j].cpu_map))
931 break;
932 /* if it is in a cluster go to next cpu */
933 if (j < num_clusters &&
934 cpumask_test_cpu(cpu, cflsplit[j].cpu_map))
935 continue;
936
937 /* this cpu isn't in any cluster */
938 /* get the shared cpus */
939 if (unlikely(cluster_config == GLOBAL_CLUSTER))
940 cpumask_copy(mask, cpu_online_mask);
941 else
942 get_shared_cpu_map(mask, cpu, cluster_config);
943
944 cpumask_copy(cflsplit[i].cpu_map, mask);
945#ifdef VERBOSE_INIT
946 print_cluster_topology(mask, cpu);
947#endif
948 /* add cpus to current cluster and init cpu_entry_t */
949 cpu_count = 0;
950 for_each_cpu(ccpu, cflsplit[i].cpu_map) {
951
952 entry = &per_cpu(cflsplit_cpu_entries, ccpu);
953 cflsplit[i].cpus[cpu_count] = entry;
954 atomic_set(&entry->will_schedule, 0);
955 entry->cpu = ccpu;
956 entry->cluster = &cflsplit[i];
957 entry->hn = &(cflsplit[i].heap_node[cpu_count]);
958 hrtimer_init(&entry->split_timer,
959 CLOCK_MONOTONIC,
960 HRTIMER_MODE_ABS);
961 entry->split_timer.function = on_split_timeout;
962 bheap_node_init(&entry->hn, entry);
963
964 cpu_count++;
965
966 entry->linked = NULL;
967 entry->scheduled = NULL;
968#ifdef CONFIG_RELEASE_MASTER
969 /* only add CPUs that should schedule jobs */
970 if (entry->cpu != entry->cluster->domain.release_master)
971#endif
972 update_cpu_position(entry);
973 }
974 /* done with this cluster */
975 break;
976 }
977 }
978
979 free_cpumask_var(mask);
980 clusters_allocated = 1;
981 return 0;
982}
983
984/* Plugin object */
985static struct sched_plugin cflsplit_plugin __cacheline_aligned_in_smp = {
986 .plugin_name = "C-FL-split",
987 .finish_switch = cflsplit_finish_switch,
988 .tick = cflsplit_tick,
989 .task_new = cflsplit_task_new,
990 .complete_job = complete_job,
991 .task_exit = cflsplit_task_exit,
992 .schedule = cflsplit_schedule,
993 .release_at = cflsplit_release_at,
994 .task_wake_up = cflsplit_task_wake_up,
995 .task_block = cflsplit_task_block,
996 .admit_task = cflsplit_admit_task,
997 .activate_plugin = cflsplit_activate_plugin,
998};
999
1000static struct proc_dir_entry *cluster_file = NULL, *cflsplit_dir = NULL;
1001
1002static int __init init_cflsplit(void)
1003{
1004 int err, fs;
1005
1006 err = register_sched_plugin(&cflsplit_plugin);
1007 if (!err) {
1008 fs = make_plugin_proc_dir(&cflsplit_plugin, &cflsplit_dir);
1009 if (!fs)
1010 cluster_file = create_cluster_file(cflsplit_dir, &cluster_config);
1011 else
1012 printk(KERN_ERR "Could not allocate C-FL-split procfs dir.\n");
1013 }
1014 return err;
1015}
1016
1017static void clean_cflsplit(void)
1018{
1019 cleanup_cflsplit();
1020 if (cluster_file)
1021 remove_proc_entry("cluster", cflsplit_dir);
1022 if (cflsplit_dir)
1023 remove_plugin_proc_dir(&cflsplit_plugin);
1024}
1025
1026module_init(init_cflsplit);
1027module_exit(clean_cflsplit);