diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2007-10-04 15:28:41 -0400 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2007-10-04 15:28:41 -0400 |
commit | 4be7a143bd5a08e8b8cee8539dea745a6107cfd3 (patch) | |
tree | fcd1bf22fa8a30fe6b7c8f44ff75fe674438f7de /kernel | |
parent | d74881ffa37434d2ce5455e9e2086292c6128d56 (diff) |
Add adaptive scheduler based on GSN-EDF.
This only introduces the necessary source files.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/litmus.c | 3 | ||||
-rw-r--r-- | kernel/sched_adaptive.c | 812 |
3 files changed, 815 insertions, 2 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 1b6957b160..55acc937b5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
13 | edf_common.o fifo_common.o pfair_common.o\ | 13 | edf_common.o fifo_common.o pfair_common.o\ |
14 | sched_global_edf.o sched_part_edf.o sched_edf_hsb.o sched_pfair.o \ | 14 | sched_global_edf.o sched_part_edf.o sched_edf_hsb.o sched_pfair.o \ |
15 | sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \ | 15 | sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \ |
16 | trace.o ft_event.o rt_domain.o | 16 | trace.o ft_event.o rt_domain.o sched_adaptive.o |
17 | 17 | ||
18 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 18 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
19 | obj-y += time/ | 19 | obj-y += time/ |
diff --git a/kernel/litmus.c b/kernel/litmus.c index 1e4db2cd20..217b0a0357 100644 --- a/kernel/litmus.c +++ b/kernel/litmus.c | |||
@@ -592,6 +592,7 @@ sched_plugin_t *init_edf_hsb_plugin(void); | |||
592 | sched_plugin_t *init_pfair_plugin(void); | 592 | sched_plugin_t *init_pfair_plugin(void); |
593 | sched_plugin_t *init_gsn_edf_plugin(void); | 593 | sched_plugin_t *init_gsn_edf_plugin(void); |
594 | sched_plugin_t *init_psn_edf_plugin(void); | 594 | sched_plugin_t *init_psn_edf_plugin(void); |
595 | sched_plugin_t *init_adaptive_plugin(void); | ||
595 | 596 | ||
596 | /* keep everything needed to setup plugins in one place */ | 597 | /* keep everything needed to setup plugins in one place */ |
597 | 598 | ||
@@ -616,7 +617,7 @@ static struct { | |||
616 | PLUGIN(PFAIR, pfair), | 617 | PLUGIN(PFAIR, pfair), |
617 | PLUGIN(GSN_EDF, gsn_edf), | 618 | PLUGIN(GSN_EDF, gsn_edf), |
618 | PLUGIN(PSN_EDF, psn_edf), | 619 | PLUGIN(PSN_EDF, psn_edf), |
619 | 620 | PLUGIN(ADAPTIVE, adaptive), | |
620 | /********************************************* | 621 | /********************************************* |
621 | * Add your custom plugin here | 622 | * Add your custom plugin here |
622 | **********************************************/ | 623 | **********************************************/ |
diff --git a/kernel/sched_adaptive.c b/kernel/sched_adaptive.c new file mode 100644 index 0000000000..14a8db7638 --- /dev/null +++ b/kernel/sched_adaptive.c | |||
@@ -0,0 +1,812 @@ | |||
1 | /* | ||
2 | * kernel/sched_adaptive.c | ||
3 | * | ||
4 | * Implementation of Aaron's adaptive global EDF scheduling algorithm. | ||
5 | * | ||
6 | * This scheduler is based on the GSN-EDF scheduler for simplicity reasons. | ||
7 | */ | ||
8 | |||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/list.h> | ||
12 | |||
13 | #include <linux/queuelock.h> | ||
14 | #include <linux/litmus.h> | ||
15 | #include <linux/sched_plugin.h> | ||
16 | #include <linux/edf_common.h> | ||
17 | #include <linux/sched_trace.h> | ||
18 | |||
19 | /* Overview of GSN-EDF operations. | ||
20 | * | ||
21 | * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This | ||
22 | * description only covers how the individual operations are implemented in | ||
23 | * LITMUS. | ||
24 | * | ||
25 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | ||
26 | * structure (NOT the actually scheduled | ||
27 | * task). If there is another linked task To | ||
28 | * already it will set To->linked_on = NO_CPU | ||
29 | * (thereby removing its association with this | ||
30 | * CPU). However, it will not requeue the | ||
31 | * previously linked task (if any). It will set | ||
32 | * T's state to RT_F_RUNNING and check whether | ||
33 | * it is already running somewhere else. If T | ||
34 | * is scheduled somewhere else it will link | ||
35 | * it to that CPU instead (and pull the linked | ||
36 | * task to cpu). T may be NULL. | ||
37 | * | ||
38 | * unlink(T) - Unlink removes T from all scheduler data | ||
39 | * structures. If it is linked to some CPU it | ||
40 | * will link NULL to that CPU. If it is | ||
41 | * currently queued in the gsnedf queue it will | ||
42 | * be removed from the T->rt_list. It is safe to | ||
43 | * call unlink(T) if T is not linked. T may not | ||
44 | * be NULL. | ||
45 | * | ||
46 | * requeue(T) - Requeue will insert T into the appropriate | ||
47 | * queue. If the system is in real-time mode and | ||
48 | * the T is released already, it will go into the | ||
49 | * ready queue. If the system is not in | ||
50 | * real-time mode is T, then T will go into the | ||
51 | * release queue. If T's release time is in the | ||
52 | * future, it will go into the release | ||
53 | * queue. That means that T's release time/job | ||
54 | * no/etc. has to be updated before requeu(T) is | ||
55 | * called. It is not safe to call requeue(T) | ||
56 | * when T is already queued. T may not be NULL. | ||
57 | * | ||
58 | * gsnedf_job_arrival(T) - This is the catch all function when T enters | ||
59 | * the system after either a suspension or at a | ||
60 | * job release. It will queue T (which means it | ||
61 | * is not safe to call gsnedf_job_arrival(T) if | ||
62 | * T is already queued) and then check whether a | ||
63 | * preemption is necessary. If a preemption is | ||
64 | * necessary it will update the linkage | ||
65 | * accordingly and cause scheduled to be called | ||
66 | * (either with an IPI or need_resched). It is | ||
67 | * safe to call gsnedf_job_arrival(T) if T's | ||
68 | * next job has not been actually released yet | ||
69 | * (releast time in the future). T will be put | ||
70 | * on the release queue in that case. | ||
71 | * | ||
72 | * job_completion(T) - Take care of everything that needs to be done | ||
73 | * to prepare T for its next release and place | ||
74 | * it in the right queue with | ||
75 | * gsnedf_job_arrival(). | ||
76 | * | ||
77 | * | ||
78 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
79 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
80 | * the functions will automatically propagate pending task from the ready queue | ||
81 | * to a linked task. This is the job of the calling function ( by means of | ||
82 | * __take_ready). | ||
83 | */ | ||
84 | |||
85 | |||
86 | /* cpu_entry_t - maintain the linked and scheduled state | ||
87 | */ | ||
88 | typedef struct { | ||
89 | int cpu; | ||
90 | struct task_struct* linked; /* only RT tasks */ | ||
91 | struct task_struct* scheduled; /* only RT tasks */ | ||
92 | struct list_head list; | ||
93 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
94 | } cpu_entry_t; | ||
95 | DEFINE_PER_CPU(cpu_entry_t, adaptive_cpu_entries); | ||
96 | |||
97 | #define set_will_schedule() \ | ||
98 | (atomic_set(&__get_cpu_var(adaptive_cpu_entries).will_schedule, 1)) | ||
99 | #define clear_will_schedule() \ | ||
100 | (atomic_set(&__get_cpu_var(adaptive_cpu_entries).will_schedule, 0)) | ||
101 | #define test_will_schedule(cpu) \ | ||
102 | (atomic_read(&per_cpu(adaptive_cpu_entries, cpu).will_schedule)) | ||
103 | |||
104 | |||
105 | #define NO_CPU 0xffffffff | ||
106 | |||
107 | /* The gsnedf_lock is used to serialize all scheduling events. | ||
108 | * It protects | ||
109 | */ | ||
110 | static queuelock_t adaptive_lock; | ||
111 | /* the cpus queue themselves according to priority in here */ | ||
112 | static LIST_HEAD(adaptive_cpu_queue); | ||
113 | |||
114 | static rt_domain_t adaptive; | ||
115 | |||
116 | |||
117 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
118 | * order in the cpu queue. Caller must hold adaptive lock. | ||
119 | */ | ||
120 | static void update_cpu_position(cpu_entry_t *entry) | ||
121 | { | ||
122 | cpu_entry_t *other; | ||
123 | struct list_head *pos; | ||
124 | list_del(&entry->list); | ||
125 | /* if we do not execute real-time jobs we just move | ||
126 | * to the end of the queue | ||
127 | */ | ||
128 | if (entry->linked) { | ||
129 | list_for_each(pos, &adaptive_cpu_queue) { | ||
130 | other = list_entry(pos, cpu_entry_t, list); | ||
131 | if (edf_higher_prio(entry->linked, other->linked)) { | ||
132 | __list_add(&entry->list, pos->prev, pos); | ||
133 | return; | ||
134 | } | ||
135 | } | ||
136 | } | ||
137 | /* if we get this far we have the lowest priority job */ | ||
138 | list_add_tail(&entry->list, &adaptive_cpu_queue); | ||
139 | } | ||
140 | |||
141 | /* link_task_to_cpu - Update the link of a CPU. | ||
142 | * Handles the case where the to-be-linked task is already | ||
143 | * scheduled on a different CPU. | ||
144 | */ | ||
145 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
146 | cpu_entry_t *entry) | ||
147 | |||
148 | { | ||
149 | cpu_entry_t *sched; | ||
150 | struct task_struct* tmp; | ||
151 | int on_cpu; | ||
152 | |||
153 | BUG_ON(linked && !is_realtime(linked)); | ||
154 | |||
155 | /* Currently linked task is set to be unlinked. */ | ||
156 | if (entry->linked) { | ||
157 | entry->linked->rt_param.linked_on = NO_CPU; | ||
158 | } | ||
159 | |||
160 | /* Link new task to CPU. */ | ||
161 | if (linked) { | ||
162 | set_rt_flags(linked, RT_F_RUNNING); | ||
163 | /* handle task is already scheduled somewhere! */ | ||
164 | on_cpu = linked->rt_param.scheduled_on; | ||
165 | if (on_cpu != NO_CPU) { | ||
166 | sched = &per_cpu(adaptive_cpu_entries, on_cpu); | ||
167 | /* this should only happen if not linked already */ | ||
168 | BUG_ON(sched->linked == linked); | ||
169 | |||
170 | /* If we are already scheduled on the CPU to which we | ||
171 | * wanted to link, we don't need to do the swap -- | ||
172 | * we just link ourselves to the CPU and depend on | ||
173 | * the caller to get things right. | ||
174 | */ | ||
175 | if (entry != sched) { | ||
176 | tmp = sched->linked; | ||
177 | linked->rt_param.linked_on = sched->cpu; | ||
178 | sched->linked = linked; | ||
179 | update_cpu_position(sched); | ||
180 | linked = tmp; | ||
181 | } | ||
182 | } | ||
183 | if (linked) /* might be NULL due to swap */ | ||
184 | linked->rt_param.linked_on = entry->cpu; | ||
185 | } | ||
186 | entry->linked = linked; | ||
187 | update_cpu_position(entry); | ||
188 | } | ||
189 | |||
190 | /* unlink - Make sure a task is not linked any longer to an entry | ||
191 | * where it was linked before. Must hold adaptive_lock. | ||
192 | */ | ||
193 | static noinline void unlink(struct task_struct* t) | ||
194 | { | ||
195 | cpu_entry_t *entry; | ||
196 | |||
197 | if (unlikely(!t)) { | ||
198 | TRACE_BUG_ON(!t); | ||
199 | return; | ||
200 | } | ||
201 | |||
202 | if (t->rt_param.linked_on != NO_CPU) { | ||
203 | /* unlink */ | ||
204 | entry = &per_cpu(adaptive_cpu_entries, t->rt_param.linked_on); | ||
205 | t->rt_param.linked_on = NO_CPU; | ||
206 | link_task_to_cpu(NULL, entry); | ||
207 | } else if (in_list(&t->rt_list)) { | ||
208 | /* This is an interesting situation: t is scheduled, | ||
209 | * but was just recently unlinked. It cannot be | ||
210 | * linked anywhere else (because then it would have | ||
211 | * been relinked to this CPU), thus it must be in some | ||
212 | * queue. We must remove it from the list in this | ||
213 | * case. | ||
214 | */ | ||
215 | list_del(&t->rt_list); | ||
216 | } | ||
217 | } | ||
218 | |||
219 | |||
220 | /* preempt - force a CPU to reschedule | ||
221 | */ | ||
222 | static noinline void preempt(cpu_entry_t *entry) | ||
223 | { | ||
224 | /* We cannot make the is_np() decision here if it is a remote CPU | ||
225 | * because requesting exit_np() requires that we currently use the | ||
226 | * address space of the task. Thus, in the remote case we just send | ||
227 | * the IPI and let schedule() handle the problem. | ||
228 | */ | ||
229 | |||
230 | if (smp_processor_id() == entry->cpu) { | ||
231 | if (entry->scheduled && is_np(entry->scheduled)) | ||
232 | request_exit_np(entry->scheduled); | ||
233 | else | ||
234 | set_tsk_need_resched(current); | ||
235 | } else | ||
236 | /* in case that it is a remote CPU we have to defer the | ||
237 | * the decision to the remote CPU | ||
238 | * FIXME: We could save a few IPI's here if we leave the flag | ||
239 | * set when we are waiting for a np_exit(). | ||
240 | */ | ||
241 | if (!test_will_schedule(entry->cpu)) | ||
242 | smp_send_reschedule(entry->cpu); | ||
243 | } | ||
244 | |||
245 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
246 | * Caller must hold adaptive_lock. | ||
247 | */ | ||
248 | static noinline void requeue(struct task_struct* task) | ||
249 | { | ||
250 | BUG_ON(!task); | ||
251 | /* sanity check rt_list before insertion */ | ||
252 | BUG_ON(in_list(&task->rt_list)); | ||
253 | |||
254 | if (get_rt_flags(task) == RT_F_SLEEP || | ||
255 | get_rt_mode() != MODE_RT_RUN) { | ||
256 | /* this task has expired | ||
257 | * _schedule has already taken care of updating | ||
258 | * the release and | ||
259 | * deadline. We just must check if it has been released. | ||
260 | */ | ||
261 | if (is_released(task) && get_rt_mode() == MODE_RT_RUN) | ||
262 | __add_ready(&adaptive, task); | ||
263 | else { | ||
264 | /* it has got to wait */ | ||
265 | __add_release(&adaptive, task); | ||
266 | } | ||
267 | |||
268 | } else | ||
269 | /* this is a forced preemption | ||
270 | * thus the task stays in the ready_queue | ||
271 | * we only must make it available to others | ||
272 | */ | ||
273 | __add_ready(&adaptive, task); | ||
274 | } | ||
275 | |||
276 | /* adaptive_job_arrival: task is either resumed or released */ | ||
277 | static noinline void adaptive_job_arrival(struct task_struct* task) | ||
278 | { | ||
279 | cpu_entry_t* last; | ||
280 | |||
281 | BUG_ON(list_empty(&adaptive_cpu_queue)); | ||
282 | BUG_ON(!task); | ||
283 | |||
284 | /* first queue arriving job */ | ||
285 | requeue(task); | ||
286 | |||
287 | /* then check for any necessary preemptions */ | ||
288 | last = list_entry(adaptive_cpu_queue.prev, cpu_entry_t, list); | ||
289 | if (edf_preemption_needed(&adaptive, last->linked)) { | ||
290 | /* preemption necessary */ | ||
291 | task = __take_ready(&adaptive); | ||
292 | TRACE("job_arrival: task %d linked to %d\n", | ||
293 | task->pid, last->cpu); | ||
294 | if (last->linked) | ||
295 | requeue(last->linked); | ||
296 | |||
297 | link_task_to_cpu(task, last); | ||
298 | preempt(last); | ||
299 | } | ||
300 | } | ||
301 | |||
302 | /* check for current job releases */ | ||
303 | static noinline void adaptive_release_jobs(void) | ||
304 | { | ||
305 | struct list_head *pos, *save; | ||
306 | struct task_struct *queued; | ||
307 | |||
308 | list_for_each_safe(pos, save, &adaptive.release_queue) { | ||
309 | queued = list_entry(pos, struct task_struct, rt_list); | ||
310 | if (likely(is_released(queued))) { | ||
311 | /* this one is ready to go*/ | ||
312 | list_del(pos); | ||
313 | set_rt_flags(queued, RT_F_RUNNING); | ||
314 | |||
315 | sched_trace_job_release(queued); | ||
316 | adaptive_job_arrival(queued); | ||
317 | } | ||
318 | else | ||
319 | /* the release queue is ordered */ | ||
320 | break; | ||
321 | } | ||
322 | } | ||
323 | |||
324 | /* adaptive_scheduler_tick - this function is called for every local timer | ||
325 | * interrupt. | ||
326 | * | ||
327 | * checks whether the current task has expired and checks | ||
328 | * whether we need to preempt it if it has not expired | ||
329 | */ | ||
330 | static reschedule_check_t adaptive_scheduler_tick(void) | ||
331 | { | ||
332 | unsigned long flags; | ||
333 | struct task_struct* t = current; | ||
334 | reschedule_check_t want_resched = NO_RESCHED; | ||
335 | |||
336 | /* expire tasks even if not in real-time mode | ||
337 | * this makes sure that at the end of real-time mode | ||
338 | * no task "runs away forever". | ||
339 | */ | ||
340 | if (is_realtime(t)) | ||
341 | TRACE_CUR("before dec: time_slice == %u\n", t->time_slice); | ||
342 | |||
343 | if (is_realtime(t) && t->time_slice && !--t->time_slice) { | ||
344 | if (!is_np(t)) { /* np tasks will be preempted when they become | ||
345 | preemptable again */ | ||
346 | want_resched = FORCE_RESCHED; | ||
347 | set_will_schedule(); | ||
348 | TRACE("adaptive_scheduler_tick: " | ||
349 | "%d is preemptable " | ||
350 | " => FORCE_RESCHED\n", t->pid); | ||
351 | } else { | ||
352 | TRACE("adaptive_scheduler_tick: " | ||
353 | "%d is non-preemptable, " | ||
354 | "preemption delayed.\n", t->pid); | ||
355 | request_exit_np(t); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /* only the first CPU needs to release jobs */ | ||
360 | if (get_rt_mode() == MODE_RT_RUN && smp_processor_id() == 0) { | ||
361 | queue_lock_irqsave(&adaptive_lock, flags); | ||
362 | |||
363 | /* (1) try to release pending jobs */ | ||
364 | adaptive_release_jobs(); | ||
365 | |||
366 | /* we don't need to check linked != scheduled since | ||
367 | * set_tsk_need_resched has been set by preempt() if necessary | ||
368 | */ | ||
369 | |||
370 | queue_unlock_irqrestore(&adaptive_lock, flags); | ||
371 | } | ||
372 | |||
373 | return want_resched; | ||
374 | } | ||
375 | |||
376 | /* caller holds adaptive_lock */ | ||
377 | static noinline void job_completion(struct task_struct *t) | ||
378 | { | ||
379 | BUG_ON(!t); | ||
380 | |||
381 | sched_trace_job_completion(t); | ||
382 | |||
383 | TRACE_TASK(t, "job_completion().\n"); | ||
384 | |||
385 | /* set flags */ | ||
386 | set_rt_flags(t, RT_F_SLEEP); | ||
387 | /* prepare for next period */ | ||
388 | edf_prepare_for_next_period(t); | ||
389 | /* unlink */ | ||
390 | unlink(t); | ||
391 | /* requeue | ||
392 | * But don't requeue a blocking task. */ | ||
393 | if (is_running(t)) | ||
394 | adaptive_job_arrival(t); | ||
395 | } | ||
396 | |||
397 | |||
398 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
399 | * assumptions on the state of the current task since it may be called for a | ||
400 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
401 | * was necessary, because sys_exit_np() was called, because some Linux | ||
402 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
403 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
404 | * current state is. | ||
405 | * | ||
406 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
407 | * | ||
408 | * The following assertions for the scheduled task could hold: | ||
409 | * | ||
410 | * - !is_running(scheduled) // the job blocks | ||
411 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
412 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
413 | * - linked != scheduled // we need to reschedule (for any reason) | ||
414 | * - is_np(scheduled) // rescheduling must be delayed, | ||
415 | * sys_exit_np must be requested | ||
416 | * | ||
417 | * Any of these can occur together. | ||
418 | */ | ||
419 | static int adaptive_schedule(struct task_struct * prev, | ||
420 | struct task_struct ** next, | ||
421 | runqueue_t * rq) | ||
422 | { | ||
423 | cpu_entry_t* entry = &__get_cpu_var(adaptive_cpu_entries); | ||
424 | int out_of_time, sleep, preempt, np, exists, | ||
425 | rt, blocks; | ||
426 | struct task_struct* linked; | ||
427 | |||
428 | /* Will be released in finish_switch. */ | ||
429 | queue_lock(&adaptive_lock); | ||
430 | clear_will_schedule(); | ||
431 | |||
432 | /* sanity checking */ | ||
433 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
434 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
435 | |||
436 | /* (0) Determine state */ | ||
437 | exists = entry->scheduled != NULL; | ||
438 | blocks = exists && !is_running(entry->scheduled); | ||
439 | out_of_time = exists && !entry->scheduled->time_slice; | ||
440 | np = exists && is_np(entry->scheduled); | ||
441 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
442 | preempt = entry->scheduled != entry->linked; | ||
443 | rt = get_rt_mode() == MODE_RT_RUN; | ||
444 | |||
445 | /* If a task blocks we have no choice but to reschedule. | ||
446 | */ | ||
447 | if (blocks) | ||
448 | unlink(entry->scheduled); | ||
449 | |||
450 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
451 | * We need to make sure to update the link structure anyway in case | ||
452 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
453 | * hurt. | ||
454 | */ | ||
455 | if (np && (out_of_time || preempt || sleep)) { | ||
456 | unlink(entry->scheduled); | ||
457 | request_exit_np(entry->scheduled); | ||
458 | } | ||
459 | |||
460 | /* Any task that is preemptable and either exhausts its execution | ||
461 | * budget or wants to sleep completes. We may have to reschedule after | ||
462 | * this. | ||
463 | */ | ||
464 | if (!np && (out_of_time || sleep)) | ||
465 | job_completion(entry->scheduled); | ||
466 | |||
467 | /* Stop real-time tasks when we leave real-time mode | ||
468 | */ | ||
469 | if (!rt && entry->linked) { | ||
470 | /* task will be preempted once it is preemptable | ||
471 | * (which it may be already) | ||
472 | */ | ||
473 | linked = entry->linked; | ||
474 | unlink(linked); | ||
475 | requeue(linked); | ||
476 | } | ||
477 | |||
478 | /* Link pending task if we became unlinked. | ||
479 | */ | ||
480 | if (rt && !entry->linked) | ||
481 | link_task_to_cpu(__take_ready(&adaptive), entry); | ||
482 | |||
483 | /* The final scheduling decision. Do we need to switch for some reason? | ||
484 | * If linked different from scheduled select linked as next. | ||
485 | */ | ||
486 | if ((!np || blocks) && | ||
487 | entry->linked != entry->scheduled) { | ||
488 | /* Take care of a previously scheduled | ||
489 | * job by taking it out of the Linux runqueue. | ||
490 | */ | ||
491 | if (entry->scheduled) { | ||
492 | if (prev->array) | ||
493 | /* take it out of the run queue */ | ||
494 | deactivate_task(prev, rq); | ||
495 | } | ||
496 | |||
497 | /* Schedule a linked job? */ | ||
498 | if (entry->linked) { | ||
499 | *next = entry->linked; | ||
500 | /* mark the task as executing on this cpu */ | ||
501 | set_task_cpu(*next, smp_processor_id()); | ||
502 | /* stick the task into the runqueue */ | ||
503 | __activate_task(*next, rq); | ||
504 | } | ||
505 | } else | ||
506 | /* Only override Linux scheduler if we have real-time task | ||
507 | * scheduled that needs to continue. | ||
508 | */ | ||
509 | if (exists) | ||
510 | *next = prev; | ||
511 | |||
512 | /* Unlock in case that we don't affect real-time tasks or | ||
513 | * if nothing changed and finish_switch won't be called. | ||
514 | */ | ||
515 | if (prev == *next || (!is_realtime(prev) && !*next)) | ||
516 | queue_unlock(&adaptive_lock); | ||
517 | |||
518 | return 0; | ||
519 | } | ||
520 | |||
521 | |||
522 | /* _finish_switch - we just finished the switch away from prev | ||
523 | */ | ||
524 | static void adaptive_finish_switch(struct task_struct *prev) | ||
525 | { | ||
526 | cpu_entry_t* entry = &__get_cpu_var(adaptive_cpu_entries); | ||
527 | |||
528 | if (is_realtime(current)) | ||
529 | entry->scheduled = current; | ||
530 | else | ||
531 | entry->scheduled = NULL; | ||
532 | |||
533 | prev->rt_param.scheduled_on = NO_CPU; | ||
534 | current->rt_param.scheduled_on = smp_processor_id(); | ||
535 | |||
536 | /* unlock in case schedule() left it locked */ | ||
537 | if (is_realtime(current) || is_realtime(prev)) | ||
538 | queue_unlock(&adaptive_lock); | ||
539 | } | ||
540 | |||
541 | |||
542 | /* Prepare a task for running in RT mode | ||
543 | * Enqueues the task into master queue data structure | ||
544 | * returns | ||
545 | * -EPERM if task is not TASK_STOPPED | ||
546 | */ | ||
547 | static long adaptive_prepare_task(struct task_struct * t) | ||
548 | { | ||
549 | unsigned long flags; | ||
550 | TRACE("gsn edf: prepare task %d\n", t->pid); | ||
551 | |||
552 | if (t->state == TASK_STOPPED) { | ||
553 | __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1); | ||
554 | |||
555 | t->rt_param.scheduled_on = NO_CPU; | ||
556 | t->rt_param.linked_on = NO_CPU; | ||
557 | if (get_rt_mode() == MODE_RT_RUN) | ||
558 | /* The action is already on. | ||
559 | * Prepare immediate release | ||
560 | */ | ||
561 | edf_release_now(t); | ||
562 | /* The task should be running in the queue, otherwise signal | ||
563 | * code will try to wake it up with fatal consequences. | ||
564 | */ | ||
565 | t->state = TASK_RUNNING; | ||
566 | |||
567 | queue_lock_irqsave(&adaptive_lock, flags); | ||
568 | requeue(t); | ||
569 | queue_unlock_irqrestore(&adaptive_lock, flags); | ||
570 | return 0; | ||
571 | } | ||
572 | else | ||
573 | return -EPERM; | ||
574 | } | ||
575 | |||
576 | static void adaptive_wake_up_task(struct task_struct *task) | ||
577 | { | ||
578 | unsigned long flags; | ||
579 | /* We must determine whether task should go into the release | ||
580 | * queue or into the ready queue. It may enter the ready queue | ||
581 | * if it has credit left in its time slice and has not yet reached | ||
582 | * its deadline. If it is now passed its deadline we assume this the | ||
583 | * arrival of a new sporadic job and thus put it in the ready queue | ||
584 | * anyway.If it has zero budget and the next release is in the future | ||
585 | * it has to go to the release queue. | ||
586 | */ | ||
587 | TRACE("adaptive: %d unsuspends with budget=%d\n", | ||
588 | task->pid, task->time_slice); | ||
589 | task->state = TASK_RUNNING; | ||
590 | |||
591 | /* We need to take suspensions because of semaphores into | ||
592 | * account! If a job resumes after being suspended due to acquiring | ||
593 | * a semaphore, it should never be treated as a new job release. | ||
594 | */ | ||
595 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
596 | set_rt_flags(task, RT_F_RUNNING); | ||
597 | } else { | ||
598 | if (is_tardy(task)) { | ||
599 | /* new sporadic release */ | ||
600 | edf_release_now(task); | ||
601 | sched_trace_job_release(task); | ||
602 | } | ||
603 | else if (task->time_slice) | ||
604 | /* came back in time before deadline | ||
605 | */ | ||
606 | set_rt_flags(task, RT_F_RUNNING); | ||
607 | } | ||
608 | |||
609 | queue_lock_irqsave(&adaptive_lock, flags); | ||
610 | adaptive_job_arrival(task); | ||
611 | queue_unlock_irqrestore(&adaptive_lock, flags); | ||
612 | } | ||
613 | |||
614 | static void adaptive_task_blocks(struct task_struct *t) | ||
615 | { | ||
616 | unsigned long flags; | ||
617 | |||
618 | /* unlink if necessary */ | ||
619 | queue_lock_irqsave(&adaptive_lock, flags); | ||
620 | unlink(t); | ||
621 | queue_unlock_irqrestore(&adaptive_lock, flags); | ||
622 | |||
623 | BUG_ON(!is_realtime(t)); | ||
624 | TRACE("task %d suspends with budget=%d\n", t->pid, t->time_slice); | ||
625 | BUG_ON(t->rt_list.next != LIST_POISON1); | ||
626 | BUG_ON(t->rt_list.prev != LIST_POISON2); | ||
627 | } | ||
628 | |||
629 | |||
630 | /* When _tear_down is called, the task should not be in any queue any more | ||
631 | * as it must have blocked first. We don't have any internal state for the task, | ||
632 | * it is all in the task_struct. | ||
633 | */ | ||
634 | static long adaptive_tear_down(struct task_struct * t) | ||
635 | { | ||
636 | BUG_ON(!is_realtime(t)); | ||
637 | TRACE_TASK(t, "RIP\n"); | ||
638 | BUG_ON(t->array); | ||
639 | BUG_ON(t->rt_list.next != LIST_POISON1); | ||
640 | BUG_ON(t->rt_list.prev != LIST_POISON2); | ||
641 | return 0; | ||
642 | } | ||
643 | |||
644 | static long adaptive_pi_block(struct pi_semaphore *sem, | ||
645 | struct task_struct *new_waiter) | ||
646 | { | ||
647 | /* This callback has to handle the situation where a new waiter is | ||
648 | * added to the wait queue of the semaphore. | ||
649 | * | ||
650 | * We must check if has a higher priority than the currently | ||
651 | * highest-priority task, and then potentially reschedule. | ||
652 | */ | ||
653 | |||
654 | BUG_ON(!new_waiter); | ||
655 | |||
656 | if (edf_higher_prio(new_waiter, sem->hp.task)) { | ||
657 | TRACE_TASK(new_waiter, " boosts priority\n"); | ||
658 | /* called with IRQs disabled */ | ||
659 | queue_lock(&adaptive_lock); | ||
660 | /* store new highest-priority task */ | ||
661 | sem->hp.task = new_waiter; | ||
662 | if (sem->holder) { | ||
663 | /* let holder inherit */ | ||
664 | sem->holder->rt_param.inh_task = new_waiter; | ||
665 | unlink(sem->holder); | ||
666 | adaptive_job_arrival(sem->holder); | ||
667 | } | ||
668 | queue_unlock(&adaptive_lock); | ||
669 | } | ||
670 | |||
671 | return 0; | ||
672 | } | ||
673 | |||
674 | static long adaptive_inherit_priority(struct pi_semaphore *sem, | ||
675 | struct task_struct *new_owner) | ||
676 | { | ||
677 | /* We don't need to acquire the adaptive_lock since at the time of this | ||
678 | * call new_owner isn't actually scheduled yet (it's still sleeping) | ||
679 | * and since the calling function already holds sem->wait.lock, which | ||
680 | * prevents concurrent sem->hp.task changes. | ||
681 | */ | ||
682 | |||
683 | if (sem->hp.task && sem->hp.task != new_owner) { | ||
684 | new_owner->rt_param.inh_task = sem->hp.task; | ||
685 | TRACE_TASK(new_owner, "inherited priority from %s/%d\n", | ||
686 | sem->hp.task->comm, sem->hp.task->pid); | ||
687 | } else | ||
688 | TRACE_TASK(new_owner, | ||
689 | "cannot inherit priority, " | ||
690 | "no higher priority job waits.\n"); | ||
691 | return 0; | ||
692 | } | ||
693 | |||
694 | /* This function is called on a semaphore release, and assumes that | ||
695 | * the current task is also the semaphore holder. | ||
696 | */ | ||
697 | static long adaptive_return_priority(struct pi_semaphore *sem) | ||
698 | { | ||
699 | struct task_struct* t = current; | ||
700 | int ret = 0; | ||
701 | |||
702 | /* Find new highest-priority semaphore task | ||
703 | * if holder task is the current hp.task. | ||
704 | * | ||
705 | * Calling function holds sem->wait.lock. | ||
706 | */ | ||
707 | if (t == sem->hp.task) | ||
708 | edf_set_hp_task(sem); | ||
709 | |||
710 | TRACE_CUR("adaptive_return_priority for lock %p\n", sem); | ||
711 | |||
712 | if (t->rt_param.inh_task) { | ||
713 | /* interrupts already disabled by PI code */ | ||
714 | queue_lock(&adaptive_lock); | ||
715 | |||
716 | /* Reset inh_task to NULL. */ | ||
717 | t->rt_param.inh_task = NULL; | ||
718 | |||
719 | /* Check if rescheduling is necessary */ | ||
720 | unlink(t); | ||
721 | adaptive_job_arrival(t); | ||
722 | queue_unlock(&adaptive_lock); | ||
723 | } | ||
724 | |||
725 | return ret; | ||
726 | } | ||
727 | |||
728 | static int adaptive_mode_change(int new_mode) | ||
729 | { | ||
730 | unsigned long flags; | ||
731 | int cpu; | ||
732 | cpu_entry_t *entry; | ||
733 | |||
734 | if (new_mode == MODE_RT_RUN) { | ||
735 | queue_lock_irqsave(&adaptive_lock, flags); | ||
736 | |||
737 | __rerelease_all(&adaptive, edf_release_at); | ||
738 | |||
739 | /* get old cruft out of the way in case we reenter real-time | ||
740 | * mode for a second time | ||
741 | */ | ||
742 | while (!list_empty(&adaptive_cpu_queue)) | ||
743 | list_del(adaptive_cpu_queue.next); | ||
744 | /* reinitialize */ | ||
745 | for_each_online_cpu(cpu) { | ||
746 | entry = &per_cpu(adaptive_cpu_entries, cpu); | ||
747 | atomic_set(&entry->will_schedule, 0); | ||
748 | entry->linked = NULL; | ||
749 | entry->scheduled = NULL; | ||
750 | list_add(&entry->list, &adaptive_cpu_queue); | ||
751 | } | ||
752 | |||
753 | queue_unlock_irqrestore(&adaptive_lock, flags); | ||
754 | |||
755 | } | ||
756 | return 0; | ||
757 | } | ||
758 | |||
759 | |||
760 | /* Plugin object */ | ||
761 | static sched_plugin_t s_plugin __cacheline_aligned_in_smp = { | ||
762 | .ready_to_use = 0 | ||
763 | }; | ||
764 | |||
765 | |||
766 | /* | ||
767 | * Plugin initialization code. | ||
768 | */ | ||
769 | #define INIT_SCHED_PLUGIN (struct sched_plugin){ \ | ||
770 | .plugin_name = "ADAPTIVE", \ | ||
771 | .ready_to_use = 1, \ | ||
772 | .algo_scheduler_tick = adaptive_scheduler_tick, \ | ||
773 | .scheduler_tick = rt_scheduler_tick, \ | ||
774 | .prepare_task = adaptive_prepare_task, \ | ||
775 | .sleep_next_period = edf_sleep_next_period, \ | ||
776 | .tear_down = adaptive_tear_down, \ | ||
777 | .schedule = adaptive_schedule, \ | ||
778 | .finish_switch = adaptive_finish_switch, \ | ||
779 | .mode_change = adaptive_mode_change, \ | ||
780 | .wake_up_task = adaptive_wake_up_task, \ | ||
781 | .task_blocks = adaptive_task_blocks, \ | ||
782 | .inherit_priority = adaptive_inherit_priority, \ | ||
783 | .return_priority = adaptive_return_priority, \ | ||
784 | .pi_block = adaptive_pi_block \ | ||
785 | } | ||
786 | |||
787 | |||
788 | sched_plugin_t *__init init_adaptive_plugin(void) | ||
789 | { | ||
790 | int cpu; | ||
791 | cpu_entry_t *entry; | ||
792 | |||
793 | if (!s_plugin.ready_to_use) | ||
794 | { | ||
795 | /* initialize CPU state */ | ||
796 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
797 | entry = &per_cpu(adaptive_cpu_entries, cpu); | ||
798 | atomic_set(&entry->will_schedule, 0); | ||
799 | entry->linked = NULL; | ||
800 | entry->scheduled = NULL; | ||
801 | entry->cpu = cpu; | ||
802 | } | ||
803 | |||
804 | queue_lock_init(&adaptive_lock); | ||
805 | set_sched_options(SCHED_NONE); | ||
806 | edf_domain_init(&adaptive, NULL); | ||
807 | s_plugin = INIT_SCHED_PLUGIN; | ||
808 | } | ||
809 | return &s_plugin; | ||
810 | } | ||
811 | |||
812 | |||