diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2007-04-15 14:26:17 -0400 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2007-04-15 14:26:17 -0400 |
commit | 3771d1e5ab177f6bbd547fcddd5d0a2a484fb590 (patch) | |
tree | 621e2480c257f4c20ec5f78ca0adbb072c8748f0 /kernel | |
parent | 8c1d21b235b1cc6422cbb9d2276612b1bbf63cf8 (diff) |
copied global edf scheduler plugin
This will be the GSN-EDF scheduler.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 3 | ||||
-rw-r--r-- | kernel/sched_gsn_edf.c | 453 |
2 files changed, 455 insertions, 1 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 5cd2351484..dc4de6314a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -11,7 +11,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ | 11 | hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ |
12 | sched_plugin.o litmus.o sched_trace.o \ | 12 | sched_plugin.o litmus.o sched_trace.o \ |
13 | edf_common.o fifo_common.o pfair_common.o\ | 13 | edf_common.o fifo_common.o pfair_common.o\ |
14 | sched_global_edf.o sched_part_edf.o sched_edf_hsb.o sched_pfair.o | 14 | sched_global_edf.o sched_part_edf.o sched_edf_hsb.o sched_pfair.o \ |
15 | sched_gsn_edf.o | ||
15 | 16 | ||
16 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 17 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
17 | obj-y += time/ | 18 | obj-y += time/ |
diff --git a/kernel/sched_gsn_edf.c b/kernel/sched_gsn_edf.c new file mode 100644 index 0000000000..c1b4498431 --- /dev/null +++ b/kernel/sched_gsn_edf.c | |||
@@ -0,0 +1,453 @@ | |||
1 | /* | ||
2 | * kernel/sched_gsn_edf.c | ||
3 | * | ||
4 | * Implementation of the GSN-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This version works without using the struct queue. It uses the | ||
7 | * builtin kernel lists. | ||
8 | */ | ||
9 | |||
10 | #include <linux/percpu.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/list.h> | ||
13 | |||
14 | #include <linux/litmus.h> | ||
15 | #include <linux/sched_plugin.h> | ||
16 | #include <linux/edf_common.h> | ||
17 | #include <linux/sched_trace.h> | ||
18 | |||
19 | |||
20 | /* cpu_entry_t - maintain state of the priority of cpu's current task | ||
21 | * this is needed to check for priority inversions. | ||
22 | */ | ||
23 | typedef struct { | ||
24 | int cpu; | ||
25 | int executes_realtime; | ||
26 | jiffie_t cur_deadline; | ||
27 | struct list_head list; | ||
28 | atomic_t will_schedule; | ||
29 | } cpu_entry_t; | ||
30 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
31 | |||
32 | #define set_will_schedule() \ | ||
33 | (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1)) | ||
34 | #define clear_will_schedule() \ | ||
35 | (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0)) | ||
36 | #define test_will_schedule(cpu) \ | ||
37 | (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule)) | ||
38 | |||
39 | |||
40 | /* always acquire the cpu lock as the last lock to avoid deadlocks */ | ||
41 | static spinlock_t gsnedf_cpu_lock = SPIN_LOCK_UNLOCKED; | ||
42 | /* the cpus queue themselves according to priority in here */ | ||
43 | static LIST_HEAD(gsnedf_cpu_queue); | ||
44 | |||
45 | |||
46 | static edf_domain_t gsnedf; | ||
47 | |||
48 | #define DUMP(args...) TRACE(args) | ||
49 | |||
50 | /* adjust_cpu_queue - Move the cpu entry to the correct place to maintain | ||
51 | * order in the cpu queue. Caller must hold ready write lock. | ||
52 | * | ||
53 | */ | ||
54 | static void adjust_cpu_queue(int exec_rt, jiffie_t deadline) | ||
55 | { | ||
56 | struct list_head *pos; | ||
57 | cpu_entry_t *other; | ||
58 | cpu_entry_t *entry; | ||
59 | |||
60 | spin_lock(&gsnedf_cpu_lock); | ||
61 | |||
62 | entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
63 | entry->executes_realtime = exec_rt; | ||
64 | entry->cur_deadline = deadline; | ||
65 | |||
66 | /* TODO: move instead of del+reinsert */ | ||
67 | list_del(&entry->list); | ||
68 | /* if we do not execute real-time jobs we just move | ||
69 | * to the end of the queue | ||
70 | */ | ||
71 | if (entry->executes_realtime) | ||
72 | list_for_each(pos, &gsnedf_cpu_queue) { | ||
73 | other = list_entry(pos, cpu_entry_t, list); | ||
74 | if (!other->executes_realtime || | ||
75 | time_before_eq(entry->cur_deadline, | ||
76 | other->cur_deadline)) | ||
77 | { | ||
78 | __list_add(&entry->list, pos->prev, pos); | ||
79 | goto out; | ||
80 | } | ||
81 | } | ||
82 | /* if we get this far we have the lowest priority task */ | ||
83 | list_add_tail(&entry->list, &gsnedf_cpu_queue); | ||
84 | |||
85 | out: | ||
86 | spin_unlock(&gsnedf_cpu_lock); | ||
87 | } | ||
88 | |||
89 | |||
90 | /* check_reschedule_needed - Check whether another CPU needs to reschedule. | ||
91 | * | ||
92 | * The function only checks and kicks the last CPU. It will reschedule and | ||
93 | * kick the next if necessary, and so on. The caller is responsible for making | ||
94 | * sure that it is not the last entry or that a reschedule is not necessary. | ||
95 | * | ||
96 | * TODO: This function is probably way too trigger happy. It should only send | ||
97 | * IPIs if the other CPU is not going to reschedule anyway. But that is | ||
98 | * hard to detect reliably. Too many schedules will hurt performance | ||
99 | * but do not cause incorrect schedules. | ||
100 | */ | ||
101 | static int gsnedf_check_resched(edf_domain_t *edf) | ||
102 | { | ||
103 | cpu_entry_t *last; | ||
104 | int ret = 0; | ||
105 | |||
106 | spin_lock(&gsnedf_cpu_lock); | ||
107 | |||
108 | if (!list_empty(&edf->ready_queue)) { | ||
109 | last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list); | ||
110 | if (!last->executes_realtime || | ||
111 | time_before(next_ready(edf)->rt_param.times.deadline, | ||
112 | last->cur_deadline)) | ||
113 | { | ||
114 | if (smp_processor_id() == last->cpu) | ||
115 | set_tsk_need_resched(current); | ||
116 | else | ||
117 | if (!test_will_schedule(last->cpu)) | ||
118 | smp_send_reschedule(last->cpu); | ||
119 | ret = 1; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | spin_unlock(&gsnedf_cpu_lock); | ||
124 | return ret; | ||
125 | } | ||
126 | |||
127 | |||
128 | |||
129 | /* gsnedf_scheduler_tick - this function is called for every local timer | ||
130 | * interrupt. | ||
131 | * | ||
132 | * checks whether the current task has expired and checks | ||
133 | * whether we need to preempt it if it has not expired | ||
134 | */ | ||
135 | static reschedule_check_t gsnedf_scheduler_tick(void) | ||
136 | { | ||
137 | unsigned long flags; | ||
138 | struct task_struct *t = current; | ||
139 | reschedule_check_t want_resched = NO_RESCHED; | ||
140 | |||
141 | /* expire tasks even if not in real-time mode | ||
142 | * this makes sure that at the end of real-time mode | ||
143 | * no tasks "run away forever". | ||
144 | */ | ||
145 | BUG_ON(is_realtime(t) && t->time_slice > 100000); | ||
146 | if (is_realtime(t) && (!--t->time_slice)) { | ||
147 | /* this task has exhausted its budget in this period */ | ||
148 | set_rt_flags(t, RT_F_SLEEP); | ||
149 | want_resched = FORCE_RESCHED; | ||
150 | set_will_schedule(); | ||
151 | sched_trace_job_completion(t); | ||
152 | } | ||
153 | if (get_rt_mode() == MODE_RT_RUN) | ||
154 | { | ||
155 | /* check whether anything is waiting to be released | ||
156 | * this could probably be moved to the global timer | ||
157 | * interrupt handler since the state will only change | ||
158 | * once per jiffie | ||
159 | */ | ||
160 | try_release_pending(&gsnedf); | ||
161 | if (want_resched != FORCE_RESCHED) | ||
162 | { | ||
163 | read_lock_irqsave(&gsnedf.ready_lock, flags); | ||
164 | if (preemption_needed(&gsnedf, t)) | ||
165 | { | ||
166 | want_resched = FORCE_RESCHED; | ||
167 | set_will_schedule(); | ||
168 | } | ||
169 | read_unlock_irqrestore(&gsnedf.ready_lock, flags); | ||
170 | } | ||
171 | } | ||
172 | return want_resched; | ||
173 | } | ||
174 | |||
175 | /* This is main Global EDF schedule function | ||
176 | * | ||
177 | * Assumes the caller holds the lock for rq and that irqs are disabled | ||
178 | * This is function only works for indirect switching | ||
179 | */ | ||
180 | static int gsnedf_schedule(struct task_struct * prev, | ||
181 | struct task_struct ** next, | ||
182 | runqueue_t * rq) | ||
183 | { | ||
184 | int need_deactivate = 1; | ||
185 | int rt; | ||
186 | jiffie_t deadline; | ||
187 | unsigned long flags; | ||
188 | |||
189 | |||
190 | if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP) | ||
191 | { | ||
192 | DUMP("preparing %d for next period\n", prev->pid); | ||
193 | prepare_for_next_period(prev); | ||
194 | } | ||
195 | |||
196 | if (get_rt_mode() == MODE_RT_RUN) { | ||
197 | write_lock_irqsave(&gsnedf.ready_lock, flags); | ||
198 | |||
199 | clear_will_schedule(); | ||
200 | |||
201 | if (is_realtime(prev) && is_released(prev) && is_running(prev) | ||
202 | && !preemption_needed(&gsnedf, prev)) { | ||
203 | /* Our current task's next job has already been | ||
204 | * released and has higher priority than the highest | ||
205 | * prioriy waiting task; in other words: it is tardy. | ||
206 | * We just keep it. | ||
207 | */ | ||
208 | DUMP("prev will be next, already released\n"); | ||
209 | *next = prev; | ||
210 | rt = 1; | ||
211 | deadline = prev->rt_param.times.deadline; | ||
212 | need_deactivate = 0; | ||
213 | } else { | ||
214 | /* either not yet released, preempted, or non-rt */ | ||
215 | *next = __take_ready(&gsnedf); | ||
216 | if (*next) { | ||
217 | /* mark the task as executing on this cpu */ | ||
218 | set_task_cpu(*next, smp_processor_id()); | ||
219 | |||
220 | /* stick the task into the runqueue */ | ||
221 | __activate_task(*next, rq); | ||
222 | rt = 1; | ||
223 | deadline = (*next)->rt_param.times.deadline; | ||
224 | } | ||
225 | else | ||
226 | rt = deadline = 0; | ||
227 | } | ||
228 | |||
229 | adjust_cpu_queue(rt, deadline); | ||
230 | |||
231 | if (rt) { | ||
232 | set_rt_flags(*next, RT_F_RUNNING); | ||
233 | gsnedf.check_resched(&gsnedf); | ||
234 | } | ||
235 | write_unlock_irqrestore(&gsnedf.ready_lock, flags); | ||
236 | } | ||
237 | |||
238 | if (is_realtime(prev) && need_deactivate && prev->array) { | ||
239 | /* take it out of the run queue */ | ||
240 | deactivate_task(prev, rq); | ||
241 | } | ||
242 | |||
243 | /* don't put back into release yet. | ||
244 | * We first need to actually switch | ||
245 | * stacks before we can execute it | ||
246 | * on a different CPU */ | ||
247 | |||
248 | /* in the current implementation nobody cares about the return value */ | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | |||
253 | /* _finish_switch - we just finished the switch away from prev | ||
254 | * it is now safe to requeue the task | ||
255 | */ | ||
256 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
257 | { | ||
258 | /*printk(KERN_INFO "gsnedf finish switch for %d\n", prev->pid);*/ | ||
259 | if (get_rt_flags(prev) == RT_F_SLEEP || | ||
260 | get_rt_mode() != MODE_RT_RUN) { | ||
261 | /* this task has expired | ||
262 | * _schedule has already taken care of updating | ||
263 | * the release and | ||
264 | * deadline. We just must check if has been released. | ||
265 | */ | ||
266 | if (time_before_eq(prev->rt_param.times.release, jiffies) | ||
267 | && get_rt_mode() == MODE_RT_RUN) { | ||
268 | /* already released */ | ||
269 | add_ready(&gsnedf, prev); | ||
270 | DUMP("%d goes straight to ready queue\n", prev->pid); | ||
271 | } | ||
272 | else | ||
273 | /* it has got to wait */ | ||
274 | add_release(&gsnedf, prev); | ||
275 | } | ||
276 | else { | ||
277 | /* this is a forced preemption | ||
278 | * thus the task stays in the ready_queue | ||
279 | * we only must make it available to others | ||
280 | */ | ||
281 | add_ready(&gsnedf, prev); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | |||
286 | /* Prepare a task for running in RT mode | ||
287 | * Enqueues the task into master queue data structure | ||
288 | * returns | ||
289 | * -EPERM if task is not TASK_STOPPED | ||
290 | */ | ||
291 | static long gsnedf_prepare_task(struct task_struct * t) | ||
292 | { | ||
293 | TRACE("global edf: prepare task %d\n", t->pid); | ||
294 | |||
295 | if (t->state == TASK_STOPPED) { | ||
296 | __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1); | ||
297 | |||
298 | if (get_rt_mode() == MODE_RT_RUN) | ||
299 | /* The action is already on. | ||
300 | * Prepare immediate release | ||
301 | */ | ||
302 | prepare_new_release(t); | ||
303 | /* The task should be running in the queue, otherwise signal | ||
304 | * code will try to wake it up with fatal consequences. | ||
305 | */ | ||
306 | t->state = TASK_RUNNING; | ||
307 | add_release(&gsnedf, t); | ||
308 | return 0; | ||
309 | } | ||
310 | else | ||
311 | return -EPERM; | ||
312 | } | ||
313 | |||
314 | static void gsnedf_wake_up_task(struct task_struct *task) | ||
315 | { | ||
316 | /* We must determine whether task should go into the release | ||
317 | * queue or into the ready queue. It may enter the ready queue | ||
318 | * if it has credit left in its time slice and has not yet reached | ||
319 | * its deadline. If it is now passed its deadline we assume this the | ||
320 | * arrival of a new sporadic job and thus put it in the ready queue | ||
321 | * anyway.If it has zero budget and the next release is in the future | ||
322 | * it has to go to the release queue. | ||
323 | */ | ||
324 | TRACE("global edf: wake up %d with budget=%d\n", | ||
325 | task->pid, task->time_slice); | ||
326 | task->state = TASK_RUNNING; | ||
327 | if (is_tardy(task)) { | ||
328 | /* new sporadic release */ | ||
329 | prepare_new_release(task); | ||
330 | sched_trace_job_release(task); | ||
331 | add_ready(&gsnedf, task); | ||
332 | } | ||
333 | else if (task->time_slice) { | ||
334 | /* came back in time before deadline | ||
335 | * TODO: clip budget to fit into period, otherwise it could | ||
336 | * cause a deadline overrun in the next period, i.e. | ||
337 | * over allocation in the next period. | ||
338 | */ | ||
339 | set_rt_flags(task, RT_F_RUNNING); | ||
340 | add_ready(&gsnedf, task); | ||
341 | } | ||
342 | else { | ||
343 | add_release(&gsnedf, task); | ||
344 | } | ||
345 | |||
346 | } | ||
347 | |||
348 | static void gsnedf_task_blocks(struct task_struct *t) | ||
349 | { | ||
350 | BUG_ON(!is_realtime(t)); | ||
351 | /* not really anything to do since it can only block if | ||
352 | * it is running, and when it is not running it is not in any | ||
353 | * queue anyway. | ||
354 | * | ||
355 | * TODO: Check whether the assumption is correct for SIGKILL and | ||
356 | * SIGSTOP. | ||
357 | */ | ||
358 | TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice); | ||
359 | BUG_ON(t->rt_list.next != LIST_POISON1); | ||
360 | BUG_ON(t->rt_list.prev != LIST_POISON2); | ||
361 | } | ||
362 | |||
363 | |||
364 | /* When _tear_down is called, the task should not be in any queue any more | ||
365 | * as it must have blocked first. We don't have any internal state for the task, | ||
366 | * it is all in the task_struct. | ||
367 | */ | ||
368 | static long gsnedf_tear_down(struct task_struct * t) | ||
369 | { | ||
370 | BUG_ON(!is_realtime(t)); | ||
371 | TRACE("global edf: tear down called for %d \n", t->pid); | ||
372 | BUG_ON(t->array); | ||
373 | BUG_ON(t->rt_list.next != LIST_POISON1); | ||
374 | BUG_ON(t->rt_list.prev != LIST_POISON2); | ||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | |||
379 | static int gsnedf_mode_change(int new_mode) | ||
380 | { | ||
381 | int cpu; | ||
382 | cpu_entry_t *entry; | ||
383 | |||
384 | /* printk(KERN_INFO "[%d] global edf: mode changed to %d\n", smp_processor_id(), | ||
385 | new_mode);*/ | ||
386 | if (new_mode == MODE_RT_RUN) { | ||
387 | prepare_new_releases(&gsnedf, jiffies + 10); | ||
388 | |||
389 | /* initialize per CPU state | ||
390 | * we can't do this at boot time because we don't know | ||
391 | * which CPUs will be online and we can't put non-existing | ||
392 | * cpus into the queue | ||
393 | */ | ||
394 | spin_lock(&gsnedf_cpu_lock); | ||
395 | /* get old cruft out of the way in case we reenter real-time | ||
396 | * mode for a second time | ||
397 | */ | ||
398 | while (!list_empty(&gsnedf_cpu_queue)) | ||
399 | list_del(gsnedf_cpu_queue.next); | ||
400 | /* reinitialize */ | ||
401 | for_each_online_cpu(cpu) { | ||
402 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
403 | atomic_set(&entry->will_schedule, 0); | ||
404 | entry->executes_realtime = 0; | ||
405 | entry->cur_deadline = 0; | ||
406 | entry->cpu = cpu; | ||
407 | list_add(&entry->list, &gsnedf_cpu_queue); | ||
408 | } | ||
409 | spin_unlock(&gsnedf_cpu_lock); | ||
410 | } | ||
411 | /*printk(KERN_INFO "[%d] global edf: mode change done\n", smp_processor_id()); */ | ||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | |||
416 | /* Plugin object */ | ||
417 | static sched_plugin_t s_plugin __cacheline_aligned_in_smp = { | ||
418 | .ready_to_use = 0 | ||
419 | }; | ||
420 | |||
421 | |||
422 | /* | ||
423 | * Plugin initialization code. | ||
424 | */ | ||
425 | #define INIT_SCHED_PLUGIN (struct sched_plugin){\ | ||
426 | .plugin_name = "GSN-EDF",\ | ||
427 | .ready_to_use = 1,\ | ||
428 | .algo_scheduler_tick = gsnedf_scheduler_tick,\ | ||
429 | .scheduler_tick = rt_scheduler_tick,\ | ||
430 | .prepare_task = gsnedf_prepare_task,\ | ||
431 | .sleep_next_period = edf_sleep_next_period,\ | ||
432 | .tear_down = gsnedf_tear_down,\ | ||
433 | .shutdown_hook = 0,\ | ||
434 | .schedule = gsnedf_schedule,\ | ||
435 | .finish_switch = gsnedf_finish_switch,\ | ||
436 | .mode_change = gsnedf_mode_change,\ | ||
437 | .wake_up_task = gsnedf_wake_up_task,\ | ||
438 | .task_blocks = gsnedf_task_blocks \ | ||
439 | } | ||
440 | |||
441 | |||
442 | sched_plugin_t *__init init_gsn_edf_plugin(void) | ||
443 | { | ||
444 | if (!s_plugin.ready_to_use) | ||
445 | { | ||
446 | set_sched_options(SCHED_NONE); | ||
447 | edf_domain_init(&gsnedf, gsnedf_check_resched); | ||
448 | s_plugin = INIT_SCHED_PLUGIN; | ||
449 | } | ||
450 | return &s_plugin; | ||
451 | } | ||
452 | |||
453 | |||