aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-11-26 14:41:19 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2013-11-26 14:41:19 -0500
commitdfe59a59caec43172bbf09fe3e2e15e9395e7e6b (patch)
tree78378d2d1b5835500bf79b0d03665bc7cf7694f5
parentb2b3e869e8d5fee88aabf001c09094b400450bac (diff)
Add C-FL scheduler plugin.
This patch adds a C-FL scheduler plugin. Original work by Jeremy Erikson, port to latest Litmus by Namhoon Kim, and cleanup and commit by Glenn Elliott.
-rw-r--r--include/litmus/edf_split_common.h25
-rw-r--r--include/litmus/litmus.h4
-rw-r--r--include/litmus/rt_param.h6
-rw-r--r--litmus/Kconfig16
-rw-r--r--litmus/Makefile1
-rw-r--r--litmus/edf_split_common.c106
-rw-r--r--litmus/sched_cfl_split.c1006
7 files changed, 1164 insertions, 0 deletions
diff --git a/include/litmus/edf_split_common.h b/include/litmus/edf_split_common.h
new file mode 100644
index 000000000000..4e7c0ce23c9d
--- /dev/null
+++ b/include/litmus/edf_split_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_SPLIT_COMMON_H__
11#define __UNC_EDF_SPLIT_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_split_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_split_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_split_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_split_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index e35c38c4c0a2..c240d9c07169 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -67,6 +67,7 @@ void litmus_exit_task(struct task_struct *tsk);
67/* job_param macros */ 67/* job_param macros */
68#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) 68#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
69#define get_deadline(t) (tsk_rt(t)->job_params.deadline) 69#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
70#define get_subjob_deadline(t) (tsk_rt(t)->job_params.subjob_deadline)
70#define get_release(t) (tsk_rt(t)->job_params.release) 71#define get_release(t) (tsk_rt(t)->job_params.release)
71#define get_lateness(t) (tsk_rt(t)->job_params.lateness) 72#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
72 73
@@ -118,6 +119,9 @@ static inline lt_t litmus_clock(void)
118#define earlier_release(a, b) (lt_before(\ 119#define earlier_release(a, b) (lt_before(\
119 (a)->rt_param.job_params.release,\ 120 (a)->rt_param.job_params.release,\
120 (b)->rt_param.job_params.release)) 121 (b)->rt_param.job_params.release))
122#define earlier_subjob_deadline(a, b) (lt_before(\
123 (a)->rt_param.job_params.subjob_deadline,\
124 (b)->rt_param.job_params.subjob_deadline))
121 125
122void preempt_if_preemptable(struct task_struct* t, int on_cpu); 126void preempt_if_preemptable(struct task_struct* t, int on_cpu);
123 127
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index fe4b31320ac8..6160a1635227 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -76,6 +76,7 @@ struct rt_task {
76 lt_t period; 76 lt_t period;
77 lt_t relative_deadline; 77 lt_t relative_deadline;
78 lt_t phase; 78 lt_t phase;
79 int split;
79 unsigned int cpu; 80 unsigned int cpu;
80 unsigned int priority; 81 unsigned int priority;
81 task_class_t cls; 82 task_class_t cls;
@@ -149,6 +150,11 @@ struct rt_job {
149 /* What is the current deadline? */ 150 /* What is the current deadline? */
150 lt_t deadline; 151 lt_t deadline;
151 152
153#ifdef CONFIG_JOB_SPLITTING
154 /* What is the deadline of the current subjob under splitting? */
155 lt_t subjob_deadline;
156#endif
157
152 /* How much service has this job received so far? */ 158 /* How much service has this job received so far? */
153 lt_t exec_time; 159 lt_t exec_time;
154 160
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 11f2801a943f..49017b26d0d3 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -12,6 +12,15 @@ config PLUGIN_CEDF
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF 12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches. 13 makes little sense since there aren't any shared caches.
14 14
15config PLUGIN_CFL
16 bool "Clustered-Fair-Lateness"
17 depends on X86 && SYSFS && JOB_SPLITTING
18 default n
19 help
20 Include the Clustered Fair Lateness (C-FL) plugin in the kernel.
21 This implements Anderson and Erickson's EDF-based scheduler.
22 Supports job splitting.
23
15config PLUGIN_PFAIR 24config PLUGIN_PFAIR
16 bool "PFAIR" 25 bool "PFAIR"
17 depends on HIGH_RES_TIMERS && HZ_PERIODIC && HZ = "1000" 26 depends on HIGH_RES_TIMERS && HZ_PERIODIC && HZ = "1000"
@@ -26,6 +35,13 @@ config PLUGIN_PFAIR
26 35
27 If unsure, say Yes. 36 If unsure, say Yes.
28 37
38config JOB_SPLITTING
39 bool "Job Splitting"
40 default n
41 help
42 Enable job-splitting features for fair-lateness schedulers, such
43 as C-FL.
44
29config RELEASE_MASTER 45config RELEASE_MASTER
30 bool "Release-master Support" 46 bool "Release-master Support"
31 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP 47 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
diff --git a/litmus/Makefile b/litmus/Makefile
index 52f407cad77c..6c83cf1734ba 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -25,6 +25,7 @@ obj-y = sched_plugin.o litmus.o \
25 25
26obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 26obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
27obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 27obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
28obj-$(CONFIG_PLUGIN_CFL) += sched_cfl_split.o edf_split_common.o
28obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 29obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
29obj-$(CONFIG_SCHED_PGM) += pgm.o 30obj-$(CONFIG_SCHED_PGM) += pgm.o
30 31
diff --git a/litmus/edf_split_common.c b/litmus/edf_split_common.c
new file mode 100644
index 000000000000..7d7f5429e4df
--- /dev/null
+++ b/litmus/edf_split_common.c
@@ -0,0 +1,106 @@
1/*
2 * kernel/edf_split_common.c
3 *
4 * Common functions for EDF based scheduler with split jobs.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_split_common.h>
16#include <litmus/edf_common.h>
17
18/* edf_split_higher_prio - returns true if first has a higher subjob
19 * EDF priority than second.
20 *
21 * both first and second may be NULL
22 */
23int edf_split_higher_prio(struct task_struct* first,
24 struct task_struct* second)
25{
26 struct task_struct *first_task = first;
27 struct task_struct *second_task = second;
28
29 /* There is no point in comparing a task to itself. */
30 if (first && first == second) {
31 TRACE_TASK(first,
32 "WARNING: pointless edf priority comparison.\n");
33 return 0;
34 }
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63#endif
64
65 if (earlier_subjob_deadline(first_task, second_task)) {
66 return 1;
67 }
68 else if (get_subjob_deadline(first_task) == get_subjob_deadline(second_task)) {
69 /* use normal edf to tie-break */
70 return edf_higher_prio(first, second);
71 }
72 return 0; /* fall-through. prio(second_task) > prio(first_task) */
73}
74
75int edf_split_ready_order(struct bheap_node* a, struct bheap_node* b)
76{
77 return edf_split_higher_prio(bheap2task(a), bheap2task(b));
78}
79
80void edf_split_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
81 release_jobs_t release)
82{
83 rt_domain_init(rt, edf_split_ready_order, resched, release);
84}
85
86/* need_to_preempt - check whether the task t needs to be preempted
87 * call only with irqs disabled and with ready_lock acquired
88 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
89 */
90int edf_split_preemption_needed(rt_domain_t* rt, struct task_struct *t)
91{
92 /* we need the read lock for edf_ready_queue */
93 /* no need to preempt if there is nothing pending */
94 if (!__jobs_pending(rt))
95 return 0;
96 /* we need to reschedule if t doesn't exist */
97 if (!t)
98 return 1;
99
100 /* NOTE: We cannot check for non-preemptibility since we
101 * don't know what address space we're currently in.
102 */
103
104 /* make sure to get non-rt stuff out of the way */
105 return !is_realtime(t) || edf_split_higher_prio(__next_ready(rt), t);
106}
diff --git a/litmus/sched_cfl_split.c b/litmus/sched_cfl_split.c
new file mode 100644
index 000000000000..7d9302eb296b
--- /dev/null
+++ b/litmus/sched_cfl_split.c
@@ -0,0 +1,1006 @@
1/*
2 * litmus/sched_cfl_split.c
3 *
4 * Implementation of a clustered version of the C-FL scheduling algorithm,
5 * with job splitting.
6 *
7 * This implementation is based on C-FL-split:
8 * - CPUs are clustered around L2 or L3 caches.
9 * - Clusters topology is automatically detected (this is arch dependent
10 * and is working only on x86 at the moment --- and only with modern
11 * cpus that exports cpuid4 information)
12 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
13 * the programmer needs to be aware of the topology to place tasks
14 * in the desired cluster
15 * - default clustering is around L2 cache (cache index = 2)
16 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
17 * online_cpus are placed in a single cluster).
18 *
19 * For details on functions, take a look at sched_gsn_edf.c
20 *
21 * Currently, we do not support changes in the number of online cpus.
22 * If the num_online_cpus() dynamically changes, the plugin is broken.
23 *
24 * This version uses the simple approach and serializes all scheduling
25 * decisions by the use of a queue lock. This is probably not the
26 * best way to do it, but it should suffice for now.
27 */
28
29#include <linux/spinlock.h>
30#include <linux/percpu.h>
31#include <linux/sched.h>
32#include <linux/slab.h>
33
34#include <linux/module.h>
35
36#include <litmus/litmus.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/budget.h>
40#include <litmus/sched_plugin.h>
41#include <litmus/edf_split_common.h>
42#include <litmus/sched_trace.h>
43
44#include <litmus/clustered.h>
45
46#include <litmus/bheap.h>
47
48#ifdef CONFIG_SCHED_CPU_AFFINITY
49#include <litmus/affinity.h>
50#endif
51
52/* to configure the cluster size */
53#include <litmus/litmus_proc.h>
54#include <linux/uaccess.h>
55
56/* Reference configuration variable. Determines which cache level is used to
57 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
58 * all CPUs form a single cluster (just like G-FL).
59 */
60static enum cache_level cluster_config = GLOBAL_CLUSTER;
61
62struct clusterdomain;
63
64/* cpu_entry_t - maintain the linked and scheduled state
65 *
66 * A cpu also contains a pointer to the cflsplit_domain_t cluster
67 * that owns it (struct clusterdomain*)
68 */
69typedef struct {
70 int cpu;
71 struct clusterdomain* cluster; /* owning cluster */
72 struct task_struct* linked; /* only RT tasks */
73 struct task_struct* scheduled; /* only RT tasks */
74 atomic_t will_schedule; /* prevent unneeded IPIs */
75 struct bheap_node* hn;
76 struct hrtimer split_timer;
77 int timer_armed;
78} cpu_entry_t;
79
80/* one cpu_entry_t per CPU */
81DEFINE_PER_CPU(cpu_entry_t, cflsplit_cpu_entries);
82
83#define set_will_schedule() \
84 (atomic_set(&__get_cpu_var(cflsplit_cpu_entries).will_schedule, 1))
85#define clear_will_schedule() \
86 (atomic_set(&__get_cpu_var(cflsplit_cpu_entries).will_schedule, 0))
87#define test_will_schedule(cpu) \
88 (atomic_read(&per_cpu(cflsplit_cpu_entries, cpu).will_schedule))
89
90/*
91 * In C-FL-split there is a cflsplit domain _per_ cluster
92 * The number of clusters is dynamically determined accordingly to the
93 * total cpu number and the cluster size
94 */
95typedef struct clusterdomain {
96 /* rt_domain for this cluster */
97 rt_domain_t domain;
98 /* cpus in this cluster */
99 cpu_entry_t* *cpus;
100 /* map of this cluster cpus */
101 cpumask_var_t cpu_map;
102 /* the cpus queue themselves according to priority in here */
103 struct bheap_node *heap_node;
104 struct bheap cpu_heap;
105 /* lock for this cluster */
106#define cluster_lock domain.ready_lock
107} cflsplit_domain_t;
108
109/* a cflsplit_domain per cluster; allocation is done at init/activation time */
110cflsplit_domain_t *cflsplit;
111
112#define remote_cluster(cpu) ((cflsplit_domain_t *) per_cpu(cflsplit_cpu_entries, cpu).cluster)
113#define task_cpu_cluster(task) remote_cluster(get_partition(task))
114
115/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
116 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
117 * information during the initialization of the plugin (e.g., topology)
118#define WANT_ALL_SCHED_EVENTS
119 */
120#define VERBOSE_INIT
121
122inline static int get_slice_num(struct task_struct* t)
123{
124 int basic = ((t->rt_param.job_params.exec_time *
125 t->rt_param.task_params.split) /
126 t->rt_param.task_params.exec_cost) + 1;
127 if (basic <= t->rt_param.task_params.split){
128 return basic;
129 }
130 else{
131 /*Since we don't police budget, just leave where it's at.*/
132 return t->rt_param.task_params.split;
133 }
134}
135
136/* Returns the appropriate subjob deadline.*/
137inline static lt_t get_proper_deadline(struct task_struct* t)
138{
139 unsigned int num_cpus = num_online_cpus();
140 return t->rt_param.job_params.release +
141 ((t->rt_param.task_params.period * get_slice_num(t))
142 / t->rt_param.task_params.split)
143 /* G-FL correction */
144 - (((num_cpus - 1) * t->rt_param.task_params.exec_cost)
145 / (num_cpus * t->rt_param.task_params.split));
146}
147
148/* Tells us if the current deadline is too small.*/
149inline static int needs_deadline_move(struct task_struct* t)
150{
151 BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline);
152 return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline;
153}
154
155/*Returns execution time until the next deadline move.
156 * 0 means the task has no more deadline moves
157 */
158inline static lt_t time_to_next_move(struct task_struct* t)
159{
160 if (get_slice_num(t) == t->rt_param.task_params.split){
161 return 0;
162 }
163 /* +1 upper bounds ceiling, since integer division is floor*/
164 return ((get_slice_num(t) * t->rt_param.task_params.exec_cost)
165 / t->rt_param.task_params.split) + 1
166 - t->rt_param.job_params.exec_time;
167}
168
169/* Timer stuff - similar to budget.c. */
170static enum hrtimer_restart on_split_timeout(struct hrtimer *timer)
171{
172 cpu_entry_t* st = container_of(timer,
173 cpu_entry_t,
174 split_timer);
175
176 unsigned long flags;
177
178 local_irq_save(flags);
179 TRACE("split timer fired: %llu\n", litmus_clock());
180 st->timer_armed = 0;
181 /* Activate scheduler */
182 litmus_reschedule_local();
183 local_irq_restore(flags);
184
185 return HRTIMER_NORESTART;
186}
187
188static void cancel_split_timer(cpu_entry_t* ce)
189{
190 int ret;
191
192 TRACE("cancelling split time.\n");
193
194 /* Since interrupts are disabled and et->timer_armed is only
195 * modified locally, we do not need any locks.
196 */
197
198 if (ce->timer_armed) {
199 ret = hrtimer_try_to_cancel(&ce->split_timer);
200 /* Should never be inactive. */
201 BUG_ON(ret == 0);
202 /* Should never be running concurrently.*/
203 BUG_ON(ret == -1);
204
205 ce->timer_armed = 0;
206 }
207}
208
209/* assumes called with IRQs off */
210static void arm_split_timer(cpu_entry_t *ce,
211 struct task_struct* t)
212{
213 lt_t when_to_fire;
214 lt_t time_to_move;
215 lt_t now = litmus_clock();
216
217 /* __hrtimer_start_range_ns() cancels the timer
218 * anyway, so we don't have to check whether it is still armed */
219
220 /*We won't do any new deadline moves if the budget has been exhausted*/
221 if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) {
222 when_to_fire = now + time_to_move;
223 TRACE_TASK(t, "actually arming for %llu into the future\n",
224 time_to_move);
225 __hrtimer_start_range_ns(&ce->split_timer,
226 ns_to_ktime(when_to_fire),
227 0 /* delta */,
228 HRTIMER_MODE_ABS_PINNED,
229 0 /* no wakeup */);
230 ce->timer_armed = 1;
231 }
232}
233
234static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
235{
236 cpu_entry_t *a, *b;
237 a = _a->value;
238 b = _b->value;
239 /* Note that a and b are inverted: we want the lowest-priority CPU at
240 * the top of the heap.
241 */
242 return edf_split_higher_prio(b->linked, a->linked);
243}
244
245/* update_cpu_position - Move the cpu entry to the correct place to maintain
246 * order in the cpu queue. Caller must hold cflsplit lock.
247 */
248static void update_cpu_position(cpu_entry_t *entry)
249{
250 cflsplit_domain_t *cluster = entry->cluster;
251
252 if (likely(bheap_node_in_heap(entry->hn)))
253 bheap_delete(cpu_lower_prio,
254 &cluster->cpu_heap,
255 entry->hn);
256
257 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
258}
259
260/* caller must hold cflsplit lock */
261static cpu_entry_t* lowest_prio_cpu(cflsplit_domain_t *cluster)
262{
263 struct bheap_node* hn;
264 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
265 return hn->value;
266}
267
268
269/* link_task_to_cpu - Update the link of a CPU.
270 * Handles the case where the to-be-linked task is already
271 * scheduled on a different CPU.
272 */
273static noinline void link_task_to_cpu(struct task_struct* linked,
274 cpu_entry_t *entry)
275{
276 cpu_entry_t *sched;
277 struct task_struct* tmp;
278 int on_cpu;
279
280 BUG_ON(linked && !is_realtime(linked));
281
282 /* Currently linked task is set to be unlinked. */
283 if (entry->linked) {
284 entry->linked->rt_param.linked_on = NO_CPU;
285 }
286
287 /* Link new task to CPU. */
288 if (linked) {
289 /* handle task is already scheduled somewhere! */
290 on_cpu = linked->rt_param.scheduled_on;
291 if (on_cpu != NO_CPU) {
292 sched = &per_cpu(cflsplit_cpu_entries, on_cpu);
293 /* this should only happen if not linked already */
294 BUG_ON(sched->linked == linked);
295
296 /* If we are already scheduled on the CPU to which we
297 * wanted to link, we don't need to do the swap --
298 * we just link ourselves to the CPU and depend on
299 * the caller to get things right.
300 */
301 if (entry != sched) {
302 TRACE_TASK(linked,
303 "already scheduled on %d, updating link.\n",
304 sched->cpu);
305 tmp = sched->linked;
306 linked->rt_param.linked_on = sched->cpu;
307 sched->linked = linked;
308 update_cpu_position(sched);
309 linked = tmp;
310 }
311 }
312 if (linked) /* might be NULL due to swap */
313 linked->rt_param.linked_on = entry->cpu;
314 }
315 entry->linked = linked;
316#ifdef WANT_ALL_SCHED_EVENTS
317 if (linked)
318 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
319 else
320 TRACE("NULL linked to %d.\n", entry->cpu);
321#endif
322 update_cpu_position(entry);
323}
324
325/* unlink - Make sure a task is not linked any longer to an entry
326 * where it was linked before. Must hold cflsplit_lock.
327 */
328static noinline void unlink(struct task_struct* t)
329{
330 cpu_entry_t *entry;
331
332 if (t->rt_param.linked_on != NO_CPU) {
333 /* unlink */
334 entry = &per_cpu(cflsplit_cpu_entries, t->rt_param.linked_on);
335 t->rt_param.linked_on = NO_CPU;
336 link_task_to_cpu(NULL, entry);
337 } else if (is_queued(t)) {
338 /* This is an interesting situation: t is scheduled,
339 * but was just recently unlinked. It cannot be
340 * linked anywhere else (because then it would have
341 * been relinked to this CPU), thus it must be in some
342 * queue. We must remove it from the list in this
343 * case.
344 *
345 * in C-FL-split case is should be somewhere in the queue for
346 * its domain, therefore and we can get the domain using
347 * task_cpu_cluster
348 */
349 remove(&(task_cpu_cluster(t))->domain, t);
350 }
351}
352
353
354/* preempt - force a CPU to reschedule
355 */
356static void preempt(cpu_entry_t *entry)
357{
358 preempt_if_preemptable(entry->scheduled, entry->cpu);
359}
360
361/* requeue - Put an unlinked task into gsn-edf domain.
362 * Caller must hold cflsplit_lock.
363 */
364static noinline void requeue(struct task_struct* task)
365{
366 cflsplit_domain_t *cluster = task_cpu_cluster(task);
367 BUG_ON(!task);
368 /* sanity check before insertion */
369 BUG_ON(is_queued(task));
370
371 if (is_early_releasing(task) || is_released(task, litmus_clock()))
372 __add_ready(&cluster->domain, task);
373 else {
374 /* it has got to wait */
375 add_release(&cluster->domain, task);
376 }
377}
378
379#ifdef CONFIG_SCHED_CPU_AFFINITY
380static cpu_entry_t* cflsplit_get_nearest_available_cpu(
381 cflsplit_domain_t *cluster, cpu_entry_t *start)
382{
383 cpu_entry_t *affinity;
384
385 get_nearest_available_cpu(affinity, start, cflsplit_cpu_entries,
386#ifdef CONFIG_RELEASE_MASTER
387 cluster->domain.release_master
388#else
389 NO_CPU
390#endif
391 );
392
393 /* make sure CPU is in our cluster */
394 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
395 return(affinity);
396 else
397 return(NULL);
398}
399#endif
400
401
402/* check for any necessary preemptions */
403static void check_for_preemptions(cflsplit_domain_t *cluster)
404{
405 struct task_struct *task;
406 cpu_entry_t *last;
407
408 for(last = lowest_prio_cpu(cluster);
409 edf_split_preemption_needed(&cluster->domain, last->linked);
410 last = lowest_prio_cpu(cluster)) {
411 /* preemption necessary */
412 task = __take_ready(&cluster->domain);
413 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
414 task->pid, last->cpu);
415#ifdef CONFIG_SCHED_CPU_AFFINITY
416 {
417 cpu_entry_t *affinity =
418 cflsplit_get_nearest_available_cpu(cluster,
419 &per_cpu(cflsplit_cpu_entries, task_cpu(task)));
420 if(affinity)
421 last = affinity;
422 else if(requeue_preempted_job(last->linked))
423 requeue(last->linked);
424 }
425#else
426 if (requeue_preempted_job(last->linked))
427 requeue(last->linked);
428#endif
429 link_task_to_cpu(task, last);
430 preempt(last);
431 }
432}
433
434/* cflsplit_job_arrival: task is either resumed or released */
435static noinline void cflsplit_job_arrival(struct task_struct* task)
436{
437 cflsplit_domain_t *cluster = task_cpu_cluster(task);
438 BUG_ON(!task);
439
440 requeue(task);
441 check_for_preemptions(cluster);
442}
443
444static void cflsplit_release_jobs(rt_domain_t* rt, struct bheap* tasks)
445{
446 cflsplit_domain_t* cluster = container_of(rt, cflsplit_domain_t, domain);
447 unsigned long flags;
448
449 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
450
451 __merge_ready(&cluster->domain, tasks);
452 check_for_preemptions(cluster);
453
454 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
455}
456
457/* caller holds cflsplit_lock */
458static noinline void job_completion(struct task_struct *t, int forced)
459{
460 BUG_ON(!t);
461
462 sched_trace_task_completion(t, forced);
463
464 TRACE_TASK(t, "job_completion().\n");
465
466 /* set flags */
467 tsk_rt(t)->completed = 0;
468 /* prepare for next period */
469 prepare_for_next_period(t);
470 /* We now also set the subjob deadline to what it should be for
471 * scheduling priority.
472 */
473 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
474 if (is_early_releasing(t) || is_released(t, litmus_clock()))
475 sched_trace_task_release(t);
476 /* unlink */
477 unlink(t);
478 /* requeue
479 * But don't requeue a blocking task. */
480 if (is_running(t))
481 cflsplit_job_arrival(t);
482}
483
484static void move_deadline(struct task_struct *t)
485{
486 tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
487 /* Check if rescheduling needed with lower priority. */
488 unlink(t);
489 cflsplit_job_arrival(t);
490}
491
492/* cflsplit_tick - this function is called for every local timer
493 * interrupt.
494 *
495 * checks whether the current task has expired and checks
496 * whether we need to preempt it if it has not expired
497 */
498static void cflsplit_tick(struct task_struct* t)
499{
500 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
501 if (!is_np(t)) {
502 /* np tasks will be preempted when they become
503 * preemptable again
504 */
505 litmus_reschedule_local();
506 set_will_schedule();
507 TRACE("cflsplit_scheduler_tick: "
508 "%d is preemptable "
509 " => FORCE_RESCHED\n", t->pid);
510 } else if (is_user_np(t)) {
511 TRACE("cflsplit_scheduler_tick: "
512 "%d is non-preemptable, "
513 "preemption delayed.\n", t->pid);
514 request_exit_np(t);
515 }
516 }
517}
518
519/* Getting schedule() right is a bit tricky. schedule() may not make any
520 * assumptions on the state of the current task since it may be called for a
521 * number of reasons. The reasons include a scheduler_tick() determined that it
522 * was necessary, because sys_exit_np() was called, because some Linux
523 * subsystem determined so, or even (in the worst case) because there is a bug
524 * hidden somewhere. Thus, we must take extreme care to determine what the
525 * current state is.
526 *
527 * The CPU could currently be scheduling a task (or not), be linked (or not).
528 *
529 * The following assertions for the scheduled task could hold:
530 *
531 * - !is_running(scheduled) // the job blocks
532 * - scheduled->timeslice == 0 // the job completed (forcefully)
533 * - is_completed() // the job completed (by syscall)
534 * - linked != scheduled // we need to reschedule (for any reason)
535 * - is_np(scheduled) // rescheduling must be delayed,
536 * sys_exit_np must be requested
537 *
538 * Any of these can occur together.
539 */
540static struct task_struct* cflsplit_schedule(struct task_struct * prev)
541{
542 cpu_entry_t* entry = &__get_cpu_var(cflsplit_cpu_entries);
543 cflsplit_domain_t *cluster = entry->cluster;
544 int out_of_time, sleep, preempt, np, exists, blocks, needs_move;
545 struct task_struct* next = NULL;
546
547#ifdef CONFIG_RELEASE_MASTER
548 /* Bail out early if we are the release master.
549 * The release master never schedules any real-time tasks.
550 */
551 if (unlikely(cluster->domain.release_master == entry->cpu)) {
552 sched_state_task_picked();
553 return NULL;
554 }
555#endif
556
557 raw_spin_lock(&cluster->cluster_lock);
558 clear_will_schedule();
559
560 /* sanity checking */
561 BUG_ON(entry->scheduled && entry->scheduled != prev);
562 BUG_ON(entry->scheduled && !is_realtime(prev));
563 BUG_ON(is_realtime(prev) && !entry->scheduled);
564
565 /* (0) Determine state */
566 exists = entry->scheduled != NULL;
567 blocks = exists && !is_running(entry->scheduled);
568 out_of_time = exists &&
569 budget_enforced(entry->scheduled) &&
570 budget_exhausted(entry->scheduled);
571 needs_move = exists && needs_deadline_move(entry->scheduled);
572 np = exists && is_np(entry->scheduled);
573 sleep = exists && is_completed(entry->scheduled);
574 preempt = entry->scheduled != entry->linked;
575
576#ifdef WANT_ALL_SCHED_EVENTS
577 TRACE_TASK(prev, "invoked cflsplit_schedule.\n");
578#endif
579
580 if (exists)
581 TRACE_TASK(prev,
582 "blocks:%d out_of_time:%d needs_move: %d np:%d"
583 " sleep:%d preempt:%d state:%d sig:%d\n",
584 blocks, out_of_time, needs_move, np, sleep, preempt,
585 prev->state, signal_pending(prev));
586 if (entry->linked && preempt)
587 TRACE_TASK(prev, "will be preempted by %s/%d\n",
588 entry->linked->comm, entry->linked->pid);
589
590
591 /* If a task blocks we have no choice but to reschedule.
592 */
593 if (blocks)
594 unlink(entry->scheduled);
595
596 /* Request a sys_exit_np() call if we would like to preempt but cannot.
597 * We need to make sure to update the link structure anyway in case
598 * that we are still linked. Multiple calls to request_exit_np() don't
599 * hurt.
600 *
601 * Job deadline moves handled similarly
602 */
603 if (np && (out_of_time || preempt || sleep)) {
604 unlink(entry->scheduled);
605 request_exit_np(entry->scheduled);
606 }
607 else if (np && needs_move) {
608 request_exit_np(entry->scheduled);
609 }
610
611 /* Any task that is preemptable and either exhausts its execution
612 * budget or wants to sleep completes. We may have to reschedule after
613 * this. Don't do a job completion if we block (can't have timers running
614 * for blocked jobs). Preemption go first for the same reason.
615 */
616 if (!np && (out_of_time || sleep) && !blocks)
617 job_completion(entry->scheduled, !sleep);
618 else if (!np && needs_move && !blocks) {
619 move_deadline(entry->scheduled);
620 }
621
622 /* Link pending task if we became unlinked.
623 */
624 if (!entry->linked)
625 link_task_to_cpu(__take_ready(&cluster->domain), entry);
626
627 /* The final scheduling decision. Do we need to switch for some reason?
628 * If linked is different from scheduled, then select linked as next.
629 */
630 if ((!np || blocks) &&
631 entry->linked != entry->scheduled) {
632 /* Schedule a linked job? */
633 if (entry->linked) {
634 entry->linked->rt_param.scheduled_on = entry->cpu;
635 next = entry->linked;
636 }
637 if (entry->scheduled) {
638 /* not gonna be scheduled soon */
639 entry->scheduled->rt_param.scheduled_on = NO_CPU;
640 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
641 }
642 } else
643 /* Only override Linux scheduler if we have a real-time task
644 * scheduled that needs to continue.
645 */
646 if (exists)
647 next = prev;
648
649 sched_state_task_picked();
650 raw_spin_unlock(&cluster->cluster_lock);
651
652 if (next) {
653 arm_split_timer(entry, next);
654 }
655 else if (entry->timer_armed) {
656 cancel_split_timer(entry);
657 }
658
659#ifdef WANT_ALL_SCHED_EVENTS
660 TRACE("cflsplit_lock released, next=0x%p\n", next);
661
662 if (next)
663 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
664 else if (exists && !next)
665 TRACE("becomes idle at %llu.\n", litmus_clock());
666#endif
667
668
669 return next;
670}
671
672
673/* _finish_switch - we just finished the switch away from prev
674 */
675static void cflsplit_finish_switch(struct task_struct *prev)
676{
677 cpu_entry_t* entry = &__get_cpu_var(cflsplit_cpu_entries);
678
679 entry->scheduled = is_realtime(current) ? current : NULL;
680#ifdef WANT_ALL_SCHED_EVENTS
681 TRACE_TASK(prev, "switched away from\n");
682#endif
683}
684
685
686static void cflsplit_release_at(struct task_struct *t, lt_t start)
687{
688 release_at(t, start);
689 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
690}
691
692
693/* Prepare a task for running in RT mode
694 */
695static void cflsplit_task_new(struct task_struct * t, int on_rq, int is_scheduled)
696{
697 unsigned long flags;
698 cpu_entry_t* entry;
699 cflsplit_domain_t* cluster;
700
701 TRACE("gsn edf: task new %d\n", t->pid);
702
703 /* the cluster doesn't change even if t is scheduled */
704 cluster = task_cpu_cluster(t);
705
706 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
707
708 /* setup job params */
709 cflsplit_release_at(t, litmus_clock());
710
711 if (is_scheduled) {
712 entry = &per_cpu(cflsplit_cpu_entries, task_cpu(t));
713 BUG_ON(entry->scheduled);
714
715#ifdef CONFIG_RELEASE_MASTER
716 if (entry->cpu != cluster->domain.release_master) {
717#endif
718 entry->scheduled = t;
719 tsk_rt(t)->scheduled_on = task_cpu(t);
720#ifdef CONFIG_RELEASE_MASTER
721 } else {
722 /* do not schedule on release master */
723 preempt(entry); /* force resched */
724 tsk_rt(t)->scheduled_on = NO_CPU;
725 }
726#endif
727 } else {
728 t->rt_param.scheduled_on = NO_CPU;
729 }
730 t->rt_param.linked_on = NO_CPU;
731
732 if (is_running(t))
733 cflsplit_job_arrival(t);
734 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
735}
736
737static void cflsplit_task_wake_up(struct task_struct *task)
738{
739 unsigned long flags;
740 lt_t now;
741 cflsplit_domain_t *cluster;
742
743 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
744
745 cluster = task_cpu_cluster(task);
746
747 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
748 now = litmus_clock();
749 if (is_sporadic(task) && is_tardy(task, now)) {
750 /* new sporadic release */
751 cflsplit_release_at(task, now);
752 sched_trace_task_release(task);
753 }
754 cflsplit_job_arrival(task);
755 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
756}
757
758static void cflsplit_task_block(struct task_struct *t)
759{
760 unsigned long flags;
761 cflsplit_domain_t *cluster;
762
763 TRACE_TASK(t, "block at %llu\n", litmus_clock());
764
765 cluster = task_cpu_cluster(t);
766
767 /* unlink if necessary */
768 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
769 unlink(t);
770 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
771
772 BUG_ON(!is_realtime(t));
773}
774
775
776static void cflsplit_task_exit(struct task_struct * t)
777{
778 unsigned long flags;
779 cflsplit_domain_t *cluster = task_cpu_cluster(t);
780
781 /* unlink if necessary */
782 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
783 unlink(t);
784 if (tsk_rt(t)->scheduled_on != NO_CPU) {
785 cpu_entry_t *cpu;
786 cpu = &per_cpu(cflsplit_cpu_entries, tsk_rt(t)->scheduled_on);
787 cpu->scheduled = NULL;
788 tsk_rt(t)->scheduled_on = NO_CPU;
789 }
790 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
791
792 BUG_ON(!is_realtime(t));
793 TRACE_TASK(t, "RIP\n");
794}
795
796static long cflsplit_admit_task(struct task_struct* tsk)
797{
798 return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ?
799 0 : -EINVAL;
800}
801
802/* total number of cluster */
803static int num_clusters;
804/* we do not support cluster of different sizes */
805static unsigned int cluster_size;
806
807#ifdef VERBOSE_INIT
808static void print_cluster_topology(cpumask_var_t mask, int cpu)
809{
810 int chk;
811 char buf[255];
812
813 chk = cpulist_scnprintf(buf, 254, mask);
814 buf[chk] = '\0';
815 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
816
817}
818#endif
819
820static int clusters_allocated = 0;
821
822static void cleanup_cflsplit(void)
823{
824 int i;
825
826 if (clusters_allocated) {
827 for (i = 0; i < num_clusters; i++) {
828 kfree(cflsplit[i].cpus);
829 kfree(cflsplit[i].heap_node);
830 free_cpumask_var(cflsplit[i].cpu_map);
831 }
832
833 kfree(cflsplit);
834 }
835}
836
837static long cflsplit_activate_plugin(void)
838{
839 int i, j, cpu, ccpu, cpu_count;
840 cpu_entry_t *entry;
841
842 cpumask_var_t mask;
843 int chk = 0;
844
845 /* de-allocate old clusters, if any */
846 cleanup_cflsplit();
847
848 printk(KERN_INFO "C-FL-split: Activate Plugin, cluster configuration = %d\n",
849 cluster_config);
850
851 /* need to get cluster_size first */
852 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
853 return -ENOMEM;
854
855 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
856 cluster_size = num_online_cpus();
857 } else {
858 chk = get_shared_cpu_map(mask, 0, cluster_config);
859 if (chk) {
860 /* if chk != 0 then it is the max allowed index */
861 printk(KERN_INFO "C-FL-split: Cluster configuration = %d "
862 "is not supported on this hardware.\n",
863 cluster_config);
864 /* User should notice that the configuration failed, so
865 * let's bail out. */
866 return -EINVAL;
867 }
868
869 cluster_size = cpumask_weight(mask);
870 }
871
872 if ((num_online_cpus() % cluster_size) != 0) {
873 /* this can't be right, some cpus are left out */
874 printk(KERN_ERR "C-FL-split: Trying to group %d cpus in %d!\n",
875 num_online_cpus(), cluster_size);
876 return -1;
877 }
878
879 num_clusters = num_online_cpus() / cluster_size;
880 printk(KERN_INFO "C-FL-split: %d cluster(s) of size = %d\n",
881 num_clusters, cluster_size);
882
883 /* initialize clusters */
884 cflsplit = kmalloc(num_clusters * sizeof(cflsplit_domain_t), GFP_ATOMIC);
885 for (i = 0; i < num_clusters; i++) {
886
887 cflsplit[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
888 GFP_ATOMIC);
889 cflsplit[i].heap_node = kmalloc(
890 cluster_size * sizeof(struct bheap_node),
891 GFP_ATOMIC);
892 bheap_init(&(cflsplit[i].cpu_heap));
893 edf_split_domain_init(&(cflsplit[i].domain), NULL,
894 cflsplit_release_jobs);
895
896 if(!zalloc_cpumask_var(&cflsplit[i].cpu_map, GFP_ATOMIC))
897 return -ENOMEM;
898#ifdef CONFIG_RELEASE_MASTER
899 cflsplit[i].domain.release_master = atomic_read(&release_master_cpu);
900#endif
901 }
902
903 /* cycle through cluster and add cpus to them */
904 for (i = 0; i < num_clusters; i++) {
905
906 for_each_online_cpu(cpu) {
907 /* check if the cpu is already in a cluster */
908 for (j = 0; j < num_clusters; j++)
909 if (cpumask_test_cpu(cpu, cflsplit[j].cpu_map))
910 break;
911 /* if it is in a cluster go to next cpu */
912 if (j < num_clusters &&
913 cpumask_test_cpu(cpu, cflsplit[j].cpu_map))
914 continue;
915
916 /* this cpu isn't in any cluster */
917 /* get the shared cpus */
918 if (unlikely(cluster_config == GLOBAL_CLUSTER))
919 cpumask_copy(mask, cpu_online_mask);
920 else
921 get_shared_cpu_map(mask, cpu, cluster_config);
922
923 cpumask_copy(cflsplit[i].cpu_map, mask);
924#ifdef VERBOSE_INIT
925 print_cluster_topology(mask, cpu);
926#endif
927 /* add cpus to current cluster and init cpu_entry_t */
928 cpu_count = 0;
929 for_each_cpu(ccpu, cflsplit[i].cpu_map) {
930
931 entry = &per_cpu(cflsplit_cpu_entries, ccpu);
932 cflsplit[i].cpus[cpu_count] = entry;
933 atomic_set(&entry->will_schedule, 0);
934 entry->cpu = ccpu;
935 entry->cluster = &cflsplit[i];
936 entry->hn = &(cflsplit[i].heap_node[cpu_count]);
937 hrtimer_init(&entry->split_timer,
938 CLOCK_MONOTONIC,
939 HRTIMER_MODE_ABS);
940 entry->split_timer.function = on_split_timeout;
941 bheap_node_init(&entry->hn, entry);
942
943 cpu_count++;
944
945 entry->linked = NULL;
946 entry->scheduled = NULL;
947#ifdef CONFIG_RELEASE_MASTER
948 /* only add CPUs that should schedule jobs */
949 if (entry->cpu != entry->cluster->domain.release_master)
950#endif
951 update_cpu_position(entry);
952 }
953 /* done with this cluster */
954 break;
955 }
956 }
957
958 free_cpumask_var(mask);
959 clusters_allocated = 1;
960 return 0;
961}
962
963/* Plugin object */
964static struct sched_plugin cflsplit_plugin __cacheline_aligned_in_smp = {
965 .plugin_name = "C-FL-split",
966 .finish_switch = cflsplit_finish_switch,
967 .tick = cflsplit_tick,
968 .task_new = cflsplit_task_new,
969 .complete_job = complete_job,
970 .task_exit = cflsplit_task_exit,
971 .schedule = cflsplit_schedule,
972 .release_at = cflsplit_release_at,
973 .task_wake_up = cflsplit_task_wake_up,
974 .task_block = cflsplit_task_block,
975 .admit_task = cflsplit_admit_task,
976 .activate_plugin = cflsplit_activate_plugin,
977};
978
979static struct proc_dir_entry *cluster_file = NULL, *cflsplit_dir = NULL;
980
981static int __init init_cflsplit(void)
982{
983 int err, fs;
984
985 err = register_sched_plugin(&cflsplit_plugin);
986 if (!err) {
987 fs = make_plugin_proc_dir(&cflsplit_plugin, &cflsplit_dir);
988 if (!fs)
989 cluster_file = create_cluster_file(cflsplit_dir, &cluster_config);
990 else
991 printk(KERN_ERR "Could not allocate C-FL-split procfs dir.\n");
992 }
993 return err;
994}
995
996static void clean_cflsplit(void)
997{
998 cleanup_cflsplit();
999 if (cluster_file)
1000 remove_proc_entry("cluster", cflsplit_dir);
1001 if (cflsplit_dir)
1002 remove_plugin_proc_dir(&cflsplit_plugin);
1003}
1004
1005module_init(init_cflsplit);
1006module_exit(clean_cflsplit);