aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-04-13 15:26:03 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-04-13 15:29:34 -0400
commit175164a7f9d8597b6ade9e4fa02effcad6fc6bbf (patch)
tree99bca9cd1061cf2adf9b912ebf39b8e432b6cd37
parentad92e346f66397c431b8856fb1eb15be29415b04 (diff)
Copy and rename gsn-edf -> cedf
-rw-r--r--litmus/sched_cedf.c673
1 files changed, 387 insertions, 286 deletions
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index d0767ce9e178..0dace2dc871f 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1,9 +1,7 @@
1/* 1/*
2 * kernel/sched_cedf.c 2 * litmus/sched_cedf.c
3 * 3 *
4 * Implementation of the Clustered EDF (C-EDF) scheduling algorithm. 4 * Implementation of the C-EDF scheduling algorithm.
5 * Linking is included so that support for synchronization (e.g., through
6 * the implementation of a "CSN-EDF" algorithm) can be added later if desired.
7 * 5 *
8 * This version uses the simple approach and serializes all scheduling 6 * This version uses the simple approach and serializes all scheduling
9 * decisions by the use of a queue lock. This is probably not the 7 * decisions by the use of a queue lock. This is probably not the
@@ -13,19 +11,23 @@
13#include <linux/spinlock.h> 11#include <linux/spinlock.h>
14#include <linux/percpu.h> 12#include <linux/percpu.h>
15#include <linux/sched.h> 13#include <linux/sched.h>
16#include <linux/list.h>
17 14
18#include <litmus/litmus.h> 15#include <litmus/litmus.h>
19#include <litmus/jobs.h> 16#include <litmus/jobs.h>
20#include <litmus/sched_plugin.h> 17#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h> 18#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h> 19#include <litmus/sched_trace.h>
20
23#include <litmus/bheap.h> 21#include <litmus/bheap.h>
24 22
25#include <linux/module.h> 23#include <linux/module.h>
26 24
27/* Overview of C-EDF operations. 25/* Overview of C-EDF operations.
28 * 26 *
27 * For a detailed explanation of C-EDF have a look at the FMLP paper. This
28 * description only covers how the individual operations are implemented in
29 * LITMUS.
30 *
29 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage 31 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
30 * structure (NOT the actually scheduled 32 * structure (NOT the actually scheduled
31 * task). If there is another linked task To 33 * task). If there is another linked task To
@@ -42,10 +44,9 @@
42 * unlink(T) - Unlink removes T from all scheduler data 44 * unlink(T) - Unlink removes T from all scheduler data
43 * structures. If it is linked to some CPU it 45 * structures. If it is linked to some CPU it
44 * will link NULL to that CPU. If it is 46 * will link NULL to that CPU. If it is
45 * currently queued in the cedf queue for 47 * currently queued in the cedf queue it will
46 * a partition, it will be removed from 48 * be removed from the rt_domain. It is safe to
47 * the rt_domain. It is safe to call 49 * call unlink(T) if T is not linked. T may not
48 * unlink(T) if T is not linked. T may not
49 * be NULL. 50 * be NULL.
50 * 51 *
51 * requeue(T) - Requeue will insert T into the appropriate 52 * requeue(T) - Requeue will insert T into the appropriate
@@ -56,11 +57,11 @@
56 * release queue. If T's release time is in the 57 * release queue. If T's release time is in the
57 * future, it will go into the release 58 * future, it will go into the release
58 * queue. That means that T's release time/job 59 * queue. That means that T's release time/job
59 * no/etc. has to be updated before requeue(T) is 60 * no/etc. has to be updated before requeu(T) is
60 * called. It is not safe to call requeue(T) 61 * called. It is not safe to call requeue(T)
61 * when T is already queued. T may not be NULL. 62 * when T is already queued. T may not be NULL.
62 * 63 *
63 * cedf_job_arrival(T) - This is the catch-all function when T enters 64 * cedf_job_arrival(T) - This is the catch all function when T enters
64 * the system after either a suspension or at a 65 * the system after either a suspension or at a
65 * job release. It will queue T (which means it 66 * job release. It will queue T (which means it
66 * is not safe to call cedf_job_arrival(T) if 67 * is not safe to call cedf_job_arrival(T) if
@@ -71,7 +72,7 @@
71 * (either with an IPI or need_resched). It is 72 * (either with an IPI or need_resched). It is
72 * safe to call cedf_job_arrival(T) if T's 73 * safe to call cedf_job_arrival(T) if T's
73 * next job has not been actually released yet 74 * next job has not been actually released yet
74 * (release time in the future). T will be put 75 * (releast time in the future). T will be put
75 * on the release queue in that case. 76 * on the release queue in that case.
76 * 77 *
77 * job_completion(T) - Take care of everything that needs to be done 78 * job_completion(T) - Take care of everything that needs to be done
@@ -87,18 +88,19 @@
87 * __take_ready). 88 * __take_ready).
88 */ 89 */
89 90
91
90/* cpu_entry_t - maintain the linked and scheduled state 92/* cpu_entry_t - maintain the linked and scheduled state
91 */ 93 */
92typedef struct { 94typedef struct {
93 int cpu; 95 int cpu;
94 struct task_struct* linked; /* only RT tasks */ 96 struct task_struct* linked; /* only RT tasks */
95 struct task_struct* scheduled; /* only RT tasks */ 97 struct task_struct* scheduled; /* only RT tasks */
96 struct list_head list;
97 atomic_t will_schedule; /* prevent unneeded IPIs */ 98 atomic_t will_schedule; /* prevent unneeded IPIs */
99 struct bheap_node* hn;
98} cpu_entry_t; 100} cpu_entry_t;
99DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); 101DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
100 102
101cpu_entry_t* *cedf_cpu_entries_array; 103cpu_entry_t* cedf_cpus[NR_CPUS];
102 104
103#define set_will_schedule() \ 105#define set_will_schedule() \
104 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) 106 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
@@ -107,75 +109,50 @@ cpu_entry_t* *cedf_cpu_entries_array;
107#define test_will_schedule(cpu) \ 109#define test_will_schedule(cpu) \
108 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 110 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
109 111
110/* Cluster size -- currently four. This is a variable to allow for
111 * the possibility of changing the cluster size online in the future.
112 */
113int cluster_size = 4;
114
115int do_cleanup = 1;
116
117typedef struct {
118 rt_domain_t domain;
119 int first_cpu;
120 int last_cpu;
121 112
122 /* the cpus queue themselves according to priority in here */ 113/* the cpus queue themselves according to priority in here */
123 struct list_head cedf_cpu_queue; 114static struct bheap_node cedf_heap_node[NR_CPUS];
115static struct bheap cedf_cpu_heap;
124 116
125 /* per-partition spinlock: protects the domain and 117static rt_domain_t cedf;
126 * serializes scheduling decisions 118#define cedf_lock (cedf.ready_lock)
127 */
128#define slock domain.ready_lock
129} cedf_domain_t;
130
131DEFINE_PER_CPU(cedf_domain_t*, cedf_domains) = NULL;
132
133cedf_domain_t* *cedf_domains_array;
134 119
135 120
136/* These are defined similarly to partitioning, except that a 121/* Uncomment this if you want to see all scheduling decisions in the
137 * tasks partition is any cpu of the cluster to which it 122 * TRACE() log.
138 * is assigned, typically the lowest-numbered cpu. 123#define WANT_ALL_SCHED_EVENTS
139 */ 124 */
140#define local_edf (&__get_cpu_var(cedf_domains)->domain) 125
141#define local_cedf __get_cpu_var(cedf_domains) 126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
142#define remote_edf(cpu) (&per_cpu(cedf_domains, cpu)->domain) 127{
143#define remote_cedf(cpu) per_cpu(cedf_domains, cpu) 128 cpu_entry_t *a, *b;
144#define task_edf(task) remote_edf(get_partition(task)) 129 a = _a->value;
145#define task_cedf(task) remote_cedf(get_partition(task)) 130 b = _b->value;
131 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap.
133 */
134 return edf_higher_prio(b->linked, a->linked);
135}
146 136
147/* update_cpu_position - Move the cpu entry to the correct place to maintain 137/* update_cpu_position - Move the cpu entry to the correct place to maintain
148 * order in the cpu queue. Caller must hold cedf lock. 138 * order in the cpu queue. Caller must hold cedf lock.
149 *
150 * This really should be a heap.
151 */ 139 */
152static void update_cpu_position(cpu_entry_t *entry) 140static void update_cpu_position(cpu_entry_t *entry)
153{ 141{
154 cpu_entry_t *other; 142 if (likely(bheap_node_in_heap(entry->hn)))
155 struct list_head *cedf_cpu_queue = 143 bheap_delete(cpu_lower_prio, &cedf_cpu_heap, entry->hn);
156 &(remote_cedf(entry->cpu))->cedf_cpu_queue; 144 bheap_insert(cpu_lower_prio, &cedf_cpu_heap, entry->hn);
157 struct list_head *pos; 145}
158
159 BUG_ON(!cedf_cpu_queue);
160 146
161 if (likely(in_list(&entry->list))) 147/* caller must hold cedf lock */
162 list_del(&entry->list); 148static cpu_entry_t* lowest_prio_cpu(void)
163 /* if we do not execute real-time jobs we just move 149{
164 * to the end of the queue 150 struct bheap_node* hn;
165 */ 151 hn = bheap_peek(cpu_lower_prio, &cedf_cpu_heap);
166 if (entry->linked) { 152 return hn->value;
167 list_for_each(pos, cedf_cpu_queue) {
168 other = list_entry(pos, cpu_entry_t, list);
169 if (edf_higher_prio(entry->linked, other->linked)) {
170 __list_add(&entry->list, pos->prev, pos);
171 return;
172 }
173 }
174 }
175 /* if we get this far we have the lowest priority job */
176 list_add_tail(&entry->list, cedf_cpu_queue);
177} 153}
178 154
155
179/* link_task_to_cpu - Update the link of a CPU. 156/* link_task_to_cpu - Update the link of a CPU.
180 * Handles the case where the to-be-linked task is already 157 * Handles the case where the to-be-linked task is already
181 * scheduled on a different CPU. 158 * scheduled on a different CPU.
@@ -189,9 +166,6 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
189 166
190 BUG_ON(linked && !is_realtime(linked)); 167 BUG_ON(linked && !is_realtime(linked));
191 168
192 /* Cannot link task to a CPU that doesn't belong to its partition... */
193 BUG_ON(linked && remote_cedf(entry->cpu) != task_cedf(linked));
194
195 /* Currently linked task is set to be unlinked. */ 169 /* Currently linked task is set to be unlinked. */
196 if (entry->linked) { 170 if (entry->linked) {
197 entry->linked->rt_param.linked_on = NO_CPU; 171 entry->linked->rt_param.linked_on = NO_CPU;
@@ -213,6 +187,9 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
213 * the caller to get things right. 187 * the caller to get things right.
214 */ 188 */
215 if (entry != sched) { 189 if (entry != sched) {
190 TRACE_TASK(linked,
191 "already scheduled on %d, updating link.\n",
192 sched->cpu);
216 tmp = sched->linked; 193 tmp = sched->linked;
217 linked->rt_param.linked_on = sched->cpu; 194 linked->rt_param.linked_on = sched->cpu;
218 sched->linked = linked; 195 sched->linked = linked;
@@ -224,13 +201,12 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
224 linked->rt_param.linked_on = entry->cpu; 201 linked->rt_param.linked_on = entry->cpu;
225 } 202 }
226 entry->linked = linked; 203 entry->linked = linked;
227 204#ifdef WANT_ALL_SCHED_EVENTS
228 if (entry->linked) 205 if (linked)
229 TRACE_TASK(entry->linked, "linked to CPU %d, state:%d\n", 206 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
230 entry->cpu, entry->linked->state);
231 else 207 else
232 TRACE("NULL linked to CPU %d\n", entry->cpu); 208 TRACE("NULL linked to %d.\n", entry->cpu);
233 209#endif
234 update_cpu_position(entry); 210 update_cpu_position(entry);
235} 211}
236 212
@@ -259,94 +235,97 @@ static noinline void unlink(struct task_struct* t)
259 * queue. We must remove it from the list in this 235 * queue. We must remove it from the list in this
260 * case. 236 * case.
261 */ 237 */
262 remove(task_edf(t), t); 238 remove(&cedf, t);
263 } 239 }
264} 240}
265 241
266 242
267/* preempt - force a CPU to reschedule 243/* preempt - force a CPU to reschedule
268 */ 244 */
269static noinline void preempt(cpu_entry_t *entry) 245static void preempt(cpu_entry_t *entry)
270{ 246{
271 preempt_if_preemptable(entry->scheduled, entry->cpu); 247 preempt_if_preemptable(entry->scheduled, entry->cpu);
272} 248}
273 249
274/* requeue - Put an unlinked task into c-edf domain. 250/* requeue - Put an unlinked task into gsn-edf domain.
275 * Caller must hold cedf_lock. 251 * Caller must hold cedf_lock.
276 */ 252 */
277static noinline void requeue(struct task_struct* task) 253static noinline void requeue(struct task_struct* task)
278{ 254{
279 cedf_domain_t* cedf;
280 rt_domain_t* edf;
281
282 BUG_ON(!task); 255 BUG_ON(!task);
283 /* sanity check rt_list before insertion */ 256 /* sanity check before insertion */
284 BUG_ON(is_queued(task)); 257 BUG_ON(is_queued(task));
285 258
286 /* Get correct real-time domain. */
287 cedf = task_cedf(task);
288 edf = &cedf->domain;
289
290 if (is_released(task, litmus_clock())) 259 if (is_released(task, litmus_clock()))
291 __add_ready(edf, task); 260 __add_ready(&cedf, task);
292 else { 261 else {
293 /* it has got to wait */ 262 /* it has got to wait */
294 add_release(edf, task); 263 add_release(&cedf, task);
295 } 264 }
296} 265}
297 266
298static void check_for_preemptions(cedf_domain_t* cedf) 267/* check for any necessary preemptions */
268static void check_for_preemptions(void)
299{ 269{
300 cpu_entry_t *last;
301 struct task_struct *task; 270 struct task_struct *task;
302 struct list_head *cedf_cpu_queue; 271 cpu_entry_t* last;
303 cedf_cpu_queue = &cedf->cedf_cpu_queue;
304 272
305 for(last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list); 273 for(last = lowest_prio_cpu();
306 edf_preemption_needed(&cedf->domain, last->linked); 274 edf_preemption_needed(&cedf, last->linked);
307 last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list)) { 275 last = lowest_prio_cpu()) {
308 /* preemption necessary */ 276 /* preemption necessary */
309 task = __take_ready(&cedf->domain); 277 task = __take_ready(&cedf);
310 TRACE("check_for_preemptions: task %d linked to %d, state:%d\n", 278 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
311 task->pid, last->cpu, task->state); 279 task->pid, last->cpu);
312 if (last->linked) 280 if (last->linked)
313 requeue(last->linked); 281 requeue(last->linked);
314 link_task_to_cpu(task, last); 282 link_task_to_cpu(task, last);
315 preempt(last); 283 preempt(last);
316 } 284 }
317
318} 285}
319 286
320/* cedf_job_arrival: task is either resumed or released */ 287/* cedf_job_arrival: task is either resumed or released */
321static noinline void cedf_job_arrival(struct task_struct* task) 288static noinline void cedf_job_arrival(struct task_struct* task)
322{ 289{
323 cedf_domain_t* cedf;
324 rt_domain_t* edf;
325
326 BUG_ON(!task); 290 BUG_ON(!task);
327 291
328 /* Get correct real-time domain. */
329 cedf = task_cedf(task);
330 edf = &cedf->domain;
331
332 /* first queue arriving job */
333 requeue(task); 292 requeue(task);
334 293 check_for_preemptions();
335 /* then check for any necessary preemptions */
336 check_for_preemptions(cedf);
337} 294}
338 295
339/* check for current job releases */
340static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) 296static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
341{ 297{
342 cedf_domain_t* cedf = container_of(rt, cedf_domain_t, domain); 298 unsigned long flags;
343 unsigned long flags; 299
300 spin_lock_irqsave(&cedf_lock, flags);
344 301
345 spin_lock_irqsave(&cedf->slock, flags); 302 __merge_ready(rt, tasks);
303 check_for_preemptions();
346 304
347 __merge_ready(&cedf->domain, tasks); 305 spin_unlock_irqrestore(&cedf_lock, flags);
348 check_for_preemptions(cedf); 306}
349 spin_unlock_irqrestore(&cedf->slock, flags); 307
308/* caller holds cedf_lock */
309static noinline void job_completion(struct task_struct *t, int forced)
310{
311 BUG_ON(!t);
312
313 sched_trace_task_completion(t, forced);
314
315 TRACE_TASK(t, "job_completion().\n");
316
317 /* set flags */
318 set_rt_flags(t, RT_F_SLEEP);
319 /* prepare for next period */
320 prepare_for_next_period(t);
321 if (is_released(t, litmus_clock()))
322 sched_trace_task_release(t);
323 /* unlink */
324 unlink(t);
325 /* requeue
326 * But don't requeue a blocking task. */
327 if (is_running(t))
328 cedf_job_arrival(t);
350} 329}
351 330
352/* cedf_tick - this function is called for every local timer 331/* cedf_tick - this function is called for every local timer
@@ -357,8 +336,6 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
357 */ 336 */
358static void cedf_tick(struct task_struct* t) 337static void cedf_tick(struct task_struct* t)
359{ 338{
360 BUG_ON(!t);
361
362 if (is_realtime(t) && budget_exhausted(t)) { 339 if (is_realtime(t) && budget_exhausted(t)) {
363 if (!is_np(t)) { 340 if (!is_np(t)) {
364 /* np tasks will be preempted when they become 341 /* np tasks will be preempted when they become
@@ -367,38 +344,17 @@ static void cedf_tick(struct task_struct* t)
367 set_tsk_need_resched(t); 344 set_tsk_need_resched(t);
368 set_will_schedule(); 345 set_will_schedule();
369 TRACE("cedf_scheduler_tick: " 346 TRACE("cedf_scheduler_tick: "
370 "%d is preemptable (state:%d) " 347 "%d is preemptable "
371 " => FORCE_RESCHED\n", t->pid, t->state); 348 " => FORCE_RESCHED\n", t->pid);
372 } else if(is_user_np(t)) { 349 } else if (is_user_np(t)) {
373 TRACE("cedf_scheduler_tick: " 350 TRACE("cedf_scheduler_tick: "
374 "%d is non-preemptable (state:%d), " 351 "%d is non-preemptable, "
375 "preemption delayed.\n", t->pid, t->state); 352 "preemption delayed.\n", t->pid);
376 request_exit_np(t); 353 request_exit_np(t);
377 } 354 }
378 } 355 }
379} 356}
380 357
381/* caller holds cedf_lock */
382static noinline void job_completion(struct task_struct *t, int forced)
383{
384 BUG_ON(!t);
385
386 sched_trace_task_completion(t, forced);
387
388 TRACE_TASK(t, "job_completion(). [state:%d]\n", t->state);
389
390 /* set flags */
391 set_rt_flags(t, RT_F_SLEEP);
392 /* prepare for next period */
393 prepare_for_next_period(t);
394 /* unlink */
395 unlink(t);
396 /* requeue
397 * But don't requeue a blocking task. */
398 if (is_running(t))
399 cedf_job_arrival(t);
400}
401
402/* Getting schedule() right is a bit tricky. schedule() may not make any 358/* Getting schedule() right is a bit tricky. schedule() may not make any
403 * assumptions on the state of the current task since it may be called for a 359 * assumptions on the state of the current task since it may be called for a
404 * number of reasons. The reasons include a scheduler_tick() determined that it 360 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -422,22 +378,17 @@ static noinline void job_completion(struct task_struct *t, int forced)
422 */ 378 */
423static struct task_struct* cedf_schedule(struct task_struct * prev) 379static struct task_struct* cedf_schedule(struct task_struct * prev)
424{ 380{
425 cedf_domain_t* cedf = local_cedf; 381 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
426 rt_domain_t* edf = &cedf->domain; 382 int out_of_time, sleep, preempt, np, exists, blocks;
427 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 383 struct task_struct* next = NULL;
428 int out_of_time, sleep, preempt, np, 384
429 exists, blocks; 385 /* Bail out early if we are the release master.
430 struct task_struct* next = NULL; 386 * The release master never schedules any real-time tasks.
431 387 */
432 BUG_ON(!prev); 388 if (cedf.release_master == entry->cpu)
433 BUG_ON(!cedf); 389 return NULL;
434 BUG_ON(!edf); 390
435 BUG_ON(!entry); 391 spin_lock(&cedf_lock);
436 BUG_ON(cedf != remote_cedf(entry->cpu));
437 BUG_ON(is_realtime(prev) && cedf != task_cedf(prev));
438
439 /* Will be released in finish_switch. */
440 spin_lock(&cedf->slock);
441 clear_will_schedule(); 392 clear_will_schedule();
442 393
443 /* sanity checking */ 394 /* sanity checking */
@@ -453,6 +404,21 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
453 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; 404 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
454 preempt = entry->scheduled != entry->linked; 405 preempt = entry->scheduled != entry->linked;
455 406
407#ifdef WANT_ALL_SCHED_EVENTS
408 TRACE_TASK(prev, "invoked cedf_schedule.\n");
409#endif
410
411 if (exists)
412 TRACE_TASK(prev,
413 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
414 "state:%d sig:%d\n",
415 blocks, out_of_time, np, sleep, preempt,
416 prev->state, signal_pending(prev));
417 if (entry->linked && preempt)
418 TRACE_TASK(prev, "will be preempted by %s/%d\n",
419 entry->linked->comm, entry->linked->pid);
420
421
456 /* If a task blocks we have no choice but to reschedule. 422 /* If a task blocks we have no choice but to reschedule.
457 */ 423 */
458 if (blocks) 424 if (blocks)
@@ -470,8 +436,8 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
470 436
471 /* Any task that is preemptable and either exhausts its execution 437 /* Any task that is preemptable and either exhausts its execution
472 * budget or wants to sleep completes. We may have to reschedule after 438 * budget or wants to sleep completes. We may have to reschedule after
473 * this. Don't do a job completion if blocks (can't have timers 439 * this. Don't do a job completion if we block (can't have timers running
474 * running for blocked jobs). Preemption go first for the same reason. 440 * for blocked jobs). Preemption go first for the same reason.
475 */ 441 */
476 if (!np && (out_of_time || sleep) && !blocks && !preempt) 442 if (!np && (out_of_time || sleep) && !blocks && !preempt)
477 job_completion(entry->scheduled, !sleep); 443 job_completion(entry->scheduled, !sleep);
@@ -479,10 +445,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
479 /* Link pending task if we became unlinked. 445 /* Link pending task if we became unlinked.
480 */ 446 */
481 if (!entry->linked) 447 if (!entry->linked)
482 link_task_to_cpu(__take_ready(edf), entry); 448 link_task_to_cpu(__take_ready(&cedf), entry);
483 449
484 /* The final scheduling decision. Do we need to switch for some reason? 450 /* The final scheduling decision. Do we need to switch for some reason?
485 * If linked different from scheduled select linked as next. 451 * If linked is different from scheduled, then select linked as next.
486 */ 452 */
487 if ((!np || blocks) && 453 if ((!np || blocks) &&
488 entry->linked != entry->scheduled) { 454 entry->linked != entry->scheduled) {
@@ -491,76 +457,90 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
491 entry->linked->rt_param.scheduled_on = entry->cpu; 457 entry->linked->rt_param.scheduled_on = entry->cpu;
492 next = entry->linked; 458 next = entry->linked;
493 } 459 }
494 if (entry->scheduled) { 460 if (entry->scheduled) {
495 /* not gonna be scheduled soon */ 461 /* not gonna be scheduled soon */
496 entry->scheduled->rt_param.scheduled_on = NO_CPU; 462 entry->scheduled->rt_param.scheduled_on = NO_CPU;
497 TRACE_TASK(entry->scheduled, "cedf_schedule: scheduled_on = NO_CPU\n"); 463 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
498 } 464 }
499 } else 465 } else
500 /* Only override Linux scheduler if we have real-time task 466 /* Only override Linux scheduler if we have a real-time task
501 * scheduled that needs to continue. 467 * scheduled that needs to continue.
502 */ 468 */
503 if (exists) 469 if (exists)
504 next = prev; 470 next = prev;
505 471
506 spin_unlock(&cedf->slock); 472 spin_unlock(&cedf_lock);
473
474#ifdef WANT_ALL_SCHED_EVENTS
475 TRACE("cedf_lock released, next=0x%p\n", next);
476
477 if (next)
478 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
479 else if (exists && !next)
480 TRACE("becomes idle at %llu.\n", litmus_clock());
481#endif
482
507 483
508 return next; 484 return next;
509} 485}
510 486
487
511/* _finish_switch - we just finished the switch away from prev 488/* _finish_switch - we just finished the switch away from prev
512 */ 489 */
513static void cedf_finish_switch(struct task_struct *prev) 490static void cedf_finish_switch(struct task_struct *prev)
514{ 491{
515 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 492 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
516
517 BUG_ON(!prev);
518 BUG_ON(!entry);
519 493
520 entry->scheduled = is_realtime(current) ? current : NULL; 494 entry->scheduled = is_realtime(current) ? current : NULL;
495#ifdef WANT_ALL_SCHED_EVENTS
496 TRACE_TASK(prev, "switched away from\n");
497#endif
521} 498}
522 499
500
523/* Prepare a task for running in RT mode 501/* Prepare a task for running in RT mode
524 */ 502 */
525static void cedf_task_new(struct task_struct *t, int on_rq, int running) 503static void cedf_task_new(struct task_struct * t, int on_rq, int running)
526{ 504{
527 unsigned long flags; 505 unsigned long flags;
528 cedf_domain_t* cedf = task_cedf(t);
529 cpu_entry_t* entry; 506 cpu_entry_t* entry;
530 507
531 BUG_ON(!cedf); 508 TRACE("gsn edf: task new %d\n", t->pid);
509
510 spin_lock_irqsave(&cedf_lock, flags);
511
512 /* setup job params */
513 release_at(t, litmus_clock());
532 514
533 spin_lock_irqsave(&cedf->slock, flags);
534 if (running) { 515 if (running) {
535 entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); 516 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
536 BUG_ON(!entry);
537 BUG_ON(entry->scheduled); 517 BUG_ON(entry->scheduled);
538 entry->scheduled = t;
539 t->rt_param.scheduled_on = task_cpu(t);
540 } else
541 t->rt_param.scheduled_on = NO_CPU;
542 t->rt_param.linked_on = NO_CPU;
543 518
544 /* setup job params */ 519 if (entry->cpu != cedf.release_master) {
545 release_at(t, litmus_clock()); 520 entry->scheduled = t;
521 tsk_rt(t)->scheduled_on = task_cpu(t);
522 } else {
523 /* do not schedule on release master */
524 preempt(entry); /* force resched */
525 tsk_rt(t)->scheduled_on = NO_CPU;
526 }
527 } else {
528 t->rt_param.scheduled_on = NO_CPU;
529 }
530 t->rt_param.linked_on = NO_CPU;
546 531
547 cedf_job_arrival(t); 532 cedf_job_arrival(t);
548 spin_unlock_irqrestore(&cedf->slock, flags); 533 spin_unlock_irqrestore(&cedf_lock, flags);
549} 534}
550 535
551
552static void cedf_task_wake_up(struct task_struct *task) 536static void cedf_task_wake_up(struct task_struct *task)
553{ 537{
554 unsigned long flags; 538 unsigned long flags;
555 cedf_domain_t* cedf; 539 lt_t now;
556 lt_t now;
557
558 BUG_ON(!task);
559 540
560 cedf = task_cedf(task); 541 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
561 BUG_ON(!cedf);
562 542
563 spin_lock_irqsave(&cedf->slock, flags); 543 spin_lock_irqsave(&cedf_lock, flags);
564 /* We need to take suspensions because of semaphores into 544 /* We need to take suspensions because of semaphores into
565 * account! If a job resumes after being suspended due to acquiring 545 * account! If a job resumes after being suspended due to acquiring
566 * a semaphore, it should never be treated as a new job release. 546 * a semaphore, it should never be treated as a new job release.
@@ -574,59 +554,234 @@ static void cedf_task_wake_up(struct task_struct *task)
574 release_at(task, now); 554 release_at(task, now);
575 sched_trace_task_release(task); 555 sched_trace_task_release(task);
576 } 556 }
577 else if (task->rt.time_slice) 557 else {
578 /* came back in time before deadline 558 if (task->rt.time_slice) {
579 */ 559 /* came back in time before deadline
580 set_rt_flags(task, RT_F_RUNNING); 560 */
561 set_rt_flags(task, RT_F_RUNNING);
562 }
563 }
581 } 564 }
582 cedf_job_arrival(task); 565 cedf_job_arrival(task);
583 spin_unlock_irqrestore(&cedf->slock, flags); 566 spin_unlock_irqrestore(&cedf_lock, flags);
584} 567}
585 568
586
587static void cedf_task_block(struct task_struct *t) 569static void cedf_task_block(struct task_struct *t)
588{ 570{
589 unsigned long flags; 571 unsigned long flags;
590 572
591 BUG_ON(!t); 573 TRACE_TASK(t, "block at %llu\n", litmus_clock());
592 574
593 /* unlink if necessary */ 575 /* unlink if necessary */
594 spin_lock_irqsave(&task_cedf(t)->slock, flags); 576 spin_lock_irqsave(&cedf_lock, flags);
595
596 t->rt_param.scheduled_on = NO_CPU;
597 unlink(t); 577 unlink(t);
598 578 spin_unlock_irqrestore(&cedf_lock, flags);
599 spin_unlock_irqrestore(&task_cedf(t)->slock, flags);
600 579
601 BUG_ON(!is_realtime(t)); 580 BUG_ON(!is_realtime(t));
602} 581}
603 582
583
604static void cedf_task_exit(struct task_struct * t) 584static void cedf_task_exit(struct task_struct * t)
605{ 585{
606 unsigned long flags; 586 unsigned long flags;
607 587
608 BUG_ON(!t);
609
610 /* unlink if necessary */ 588 /* unlink if necessary */
611 spin_lock_irqsave(&task_cedf(t)->slock, flags); 589 spin_lock_irqsave(&cedf_lock, flags);
612 unlink(t); 590 unlink(t);
613 if (tsk_rt(t)->scheduled_on != NO_CPU) { 591 if (tsk_rt(t)->scheduled_on != NO_CPU) {
614 cedf_cpu_entries_array[tsk_rt(t)->scheduled_on]-> 592 cedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
615 scheduled = NULL;
616 tsk_rt(t)->scheduled_on = NO_CPU; 593 tsk_rt(t)->scheduled_on = NO_CPU;
617 } 594 }
618 spin_unlock_irqrestore(&task_cedf(t)->slock, flags); 595 spin_unlock_irqrestore(&cedf_lock, flags);
619 596
620 BUG_ON(!is_realtime(t)); 597 BUG_ON(!is_realtime(t));
621 TRACE_TASK(t, "RIP\n"); 598 TRACE_TASK(t, "RIP\n");
622} 599}
623 600
601#ifdef CONFIG_FMLP
602
603/* Update the queue position of a task that got it's priority boosted via
604 * priority inheritance. */
605static void update_queue_position(struct task_struct *holder)
606{
607 /* We don't know whether holder is in the ready queue. It should, but
608 * on a budget overrun it may already be in a release queue. Hence,
609 * calling unlink() is not possible since it assumes that the task is
610 * not in a release queue. However, we can safely check whether
611 * sem->holder is currently in a queue or scheduled after locking both
612 * the release and the ready queue lock. */
613
614 /* Assumption: caller holds cedf_lock */
615
616 int check_preempt = 0;
617
618 if (tsk_rt(holder)->linked_on != NO_CPU) {
619 TRACE_TASK(holder, "%s: linked on %d\n",
620 __FUNCTION__, tsk_rt(holder)->linked_on);
621 /* Holder is scheduled; need to re-order CPUs.
622 * We can't use heap_decrease() here since
623 * the cpu_heap is ordered in reverse direction, so
624 * it is actually an increase. */
625 bheap_delete(cpu_lower_prio, &cedf_cpu_heap,
626 cedf_cpus[tsk_rt(holder)->linked_on]->hn);
627 bheap_insert(cpu_lower_prio, &cedf_cpu_heap,
628 cedf_cpus[tsk_rt(holder)->linked_on]->hn);
629 } else {
630 /* holder may be queued: first stop queue changes */
631 spin_lock(&cedf.release_lock);
632 if (is_queued(holder)) {
633 TRACE_TASK(holder, "%s: is queued\n",
634 __FUNCTION__);
635 /* We need to update the position
636 * of holder in some heap. Note that this
637 * may be a release heap. */
638 check_preempt =
639 !bheap_decrease(edf_ready_order,
640 tsk_rt(holder)->heap_node);
641 } else {
642 /* Nothing to do: if it is not queued and not linked
643 * then it is currently being moved by other code
644 * (e.g., a timer interrupt handler) that will use the
645 * correct priority when enqueuing the task. */
646 TRACE_TASK(holder, "%s: is NOT queued => Done.\n",
647 __FUNCTION__);
648 }
649 spin_unlock(&cedf.release_lock);
650
651 /* If holder was enqueued in a release heap, then the following
652 * preemption check is pointless, but we can't easily detect
653 * that case. If you want to fix this, then consider that
654 * simply adding a state flag requires O(n) time to update when
655 * releasing n tasks, which conflicts with the goal to have
656 * O(log n) merges. */
657 if (check_preempt) {
658 /* heap_decrease() hit the top level of the heap: make
659 * sure preemption checks get the right task, not the
660 * potentially stale cache. */
661 bheap_uncache_min(edf_ready_order,
662 &cedf.ready_queue);
663 check_for_preemptions();
664 }
665 }
666}
667
668static long cedf_pi_block(struct pi_semaphore *sem,
669 struct task_struct *new_waiter)
670{
671 /* This callback has to handle the situation where a new waiter is
672 * added to the wait queue of the semaphore.
673 *
674 * We must check if has a higher priority than the currently
675 * highest-priority task, and then potentially reschedule.
676 */
677
678 BUG_ON(!new_waiter);
679
680 if (edf_higher_prio(new_waiter, sem->hp.task)) {
681 TRACE_TASK(new_waiter, " boosts priority via %p\n", sem);
682 /* called with IRQs disabled */
683 spin_lock(&cedf_lock);
684 /* store new highest-priority task */
685 sem->hp.task = new_waiter;
686 if (sem->holder) {
687 TRACE_TASK(sem->holder,
688 " holds %p and will inherit from %s/%d\n",
689 sem,
690 new_waiter->comm, new_waiter->pid);
691 /* let holder inherit */
692 sem->holder->rt_param.inh_task = new_waiter;
693 update_queue_position(sem->holder);
694 }
695 spin_unlock(&cedf_lock);
696 }
697
698 return 0;
699}
700
701static long cedf_inherit_priority(struct pi_semaphore *sem,
702 struct task_struct *new_owner)
703{
704 /* We don't need to acquire the cedf_lock since at the time of this
705 * call new_owner isn't actually scheduled yet (it's still sleeping)
706 * and since the calling function already holds sem->wait.lock, which
707 * prevents concurrent sem->hp.task changes.
708 */
709
710 if (sem->hp.task && sem->hp.task != new_owner) {
711 new_owner->rt_param.inh_task = sem->hp.task;
712 TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
713 sem->hp.task->comm, sem->hp.task->pid);
714 } else
715 TRACE_TASK(new_owner,
716 "cannot inherit priority, "
717 "no higher priority job waits.\n");
718 return 0;
719}
720
721/* This function is called on a semaphore release, and assumes that
722 * the current task is also the semaphore holder.
723 */
724static long cedf_return_priority(struct pi_semaphore *sem)
725{
726 struct task_struct* t = current;
727 int ret = 0;
728
729 /* Find new highest-priority semaphore task
730 * if holder task is the current hp.task.
731 *
732 * Calling function holds sem->wait.lock.
733 */
734 if (t == sem->hp.task)
735 edf_set_hp_task(sem);
736
737 TRACE_CUR("cedf_return_priority for lock %p\n", sem);
738
739 if (t->rt_param.inh_task) {
740 /* interrupts already disabled by PI code */
741 spin_lock(&cedf_lock);
742
743 /* Reset inh_task to NULL. */
744 t->rt_param.inh_task = NULL;
745
746 /* Check if rescheduling is necessary */
747 unlink(t);
748 cedf_job_arrival(t);
749 spin_unlock(&cedf_lock);
750 }
751
752 return ret;
753}
754
755#endif
756
624static long cedf_admit_task(struct task_struct* tsk) 757static long cedf_admit_task(struct task_struct* tsk)
625{ 758{
626 return (task_cpu(tsk) >= task_cedf(tsk)->first_cpu && 759 return 0;
627 task_cpu(tsk) <= task_cedf(tsk)->last_cpu) ? 0 : -EINVAL;
628} 760}
629 761
762static long cedf_activate_plugin(void)
763{
764 int cpu;
765 cpu_entry_t *entry;
766
767 bheap_init(&cedf_cpu_heap);
768 cedf.release_master = atomic_read(&release_master_cpu);
769
770 for_each_online_cpu(cpu) {
771 entry = &per_cpu(cedf_cpu_entries, cpu);
772 bheap_node_init(&entry->hn, entry);
773 atomic_set(&entry->will_schedule, 0);
774 entry->linked = NULL;
775 entry->scheduled = NULL;
776 if (cpu != cedf.release_master) {
777 TRACE("C-EDF: Initializing CPU #%d.\n", cpu);
778 update_cpu_position(entry);
779 } else {
780 TRACE("C-EDF: CPU %d is release master.\n", cpu);
781 }
782 }
783 return 0;
784}
630 785
631/* Plugin object */ 786/* Plugin object */
632static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { 787static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
@@ -639,89 +794,35 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
639 .schedule = cedf_schedule, 794 .schedule = cedf_schedule,
640 .task_wake_up = cedf_task_wake_up, 795 .task_wake_up = cedf_task_wake_up,
641 .task_block = cedf_task_block, 796 .task_block = cedf_task_block,
642 .admit_task = cedf_admit_task 797#ifdef CONFIG_FMLP
798 .fmlp_active = 1,
799 .pi_block = cedf_pi_block,
800 .inherit_priority = cedf_inherit_priority,
801 .return_priority = cedf_return_priority,
802#endif
803 .admit_task = cedf_admit_task,
804 .activate_plugin = cedf_activate_plugin,
643}; 805};
644 806
645static void cedf_domain_init(int first_cpu, int last_cpu)
646{
647 int cpu;
648
649 /* Create new domain for this cluster. */
650 cedf_domain_t *new_cedf_domain = kmalloc(sizeof(*new_cedf_domain),
651 GFP_KERNEL);
652
653 /* Initialize cluster domain. */
654 edf_domain_init(&new_cedf_domain->domain, NULL,
655 cedf_release_jobs);
656 new_cedf_domain->first_cpu = first_cpu;
657 new_cedf_domain->last_cpu = last_cpu;
658 INIT_LIST_HEAD(&new_cedf_domain->cedf_cpu_queue);
659
660 /* Assign all cpus in cluster to point to this domain. */
661 for (cpu = first_cpu; cpu <= last_cpu; cpu++) {
662 remote_cedf(cpu) = new_cedf_domain;
663 cedf_domains_array[cpu] = new_cedf_domain;
664 }
665}
666 807
667static int __init init_cedf(void) 808static int __init init_cedf(void)
668{ 809{
669 int cpu; 810 int cpu;
670 cpu_entry_t *entry; 811 cpu_entry_t *entry;
671 812
672 /* num_online_cpus() should have been set already 813 bheap_init(&cedf_cpu_heap);
673 * if the number of available cpus is less then the cluster
674 * size (currently 4) then it is pointless trying to use
675 * CEDF, so we disable this plugin
676 */
677 if(num_online_cpus() < cluster_size) {
678 printk(KERN_INFO "Not registering C-EDF plugin: "
679 "Num Online Cpus (%d) < Min Cluster Size (%d)\n",
680 num_online_cpus(), cluster_size);
681 do_cleanup = 0;
682 return 0;
683 }
684
685 /*
686 * initialize short_cut for per-cpu cedf state;
687 * there may be a problem here if someone removes a cpu
688 * while we are doing this initialization... and if cpus
689 * are added / removed later... is it a _real_ problem for cedf?
690 */
691 cedf_cpu_entries_array = kmalloc(
692 sizeof(cpu_entry_t *) * num_online_cpus(),
693 GFP_KERNEL);
694
695 cedf_domains_array = kmalloc(
696 sizeof(cedf_domain_t *) * num_online_cpus(),
697 GFP_KERNEL);
698
699 /* initialize CPU state */ 814 /* initialize CPU state */
700 for (cpu = 0; cpu < num_online_cpus(); cpu++) { 815 for (cpu = 0; cpu < NR_CPUS; cpu++) {
701 entry = &per_cpu(cedf_cpu_entries, cpu); 816 entry = &per_cpu(cedf_cpu_entries, cpu);
702 cedf_cpu_entries_array[cpu] = entry; 817 cedf_cpus[cpu] = entry;
703 atomic_set(&entry->will_schedule, 0); 818 atomic_set(&entry->will_schedule, 0);
704 entry->linked = NULL;
705 entry->scheduled = NULL;
706 entry->cpu = cpu; 819 entry->cpu = cpu;
707 INIT_LIST_HEAD(&entry->list); 820 entry->hn = &cedf_heap_node[cpu];
821 bheap_node_init(&entry->hn, entry);
708 } 822 }
709 823 edf_domain_init(&cedf, NULL, cedf_release_jobs);
710 /* initialize all cluster domains */
711 for (cpu = 0; cpu < num_online_cpus(); cpu += cluster_size)
712 cedf_domain_init(cpu, cpu+cluster_size-1);
713
714 return register_sched_plugin(&cedf_plugin); 824 return register_sched_plugin(&cedf_plugin);
715} 825}
716 826
717static void clean_cedf(void)
718{
719 if(do_cleanup) {
720 kfree(cedf_cpu_entries_array);
721 kfree(cedf_domains_array);
722 }
723}
724 827
725module_init(init_cedf); 828module_init(init_cedf);
726module_exit(clean_cedf);
727