aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2016-03-16 08:01:32 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2016-03-20 14:30:37 -0400
commitdbf173a2fbe2abe9a5ee149390705f18c2b17f25 (patch)
tree4f5b6105e2a5e25fa6e30776215191b810ddf78f /litmus
parent2a38056cc098c56a04bbe18f4e752f4fa782599f (diff)
Add partitioned reservation-based scheduler plugin (P-RES)
A simple partitioned scheduler that provides a reservation environment on each core, based on the generic reservations code. Hierarchical scheduling is not supported in this version.
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Makefile2
-rw-r--r--litmus/sched_pres.c578
2 files changed, 580 insertions, 0 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index c969ce59db67..ecaa28dc68ad 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -31,4 +31,6 @@ obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
33 33
34obj-y += sched_pres.o
35
34obj-y += reservations/ 36obj-y += reservations/
diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
new file mode 100644
index 000000000000..5992c55ee737
--- /dev/null
+++ b/litmus/sched_pres.c
@@ -0,0 +1,578 @@
1#include <linux/percpu.h>
2#include <linux/slab.h>
3#include <asm/uaccess.h>
4
5#include <litmus/sched_plugin.h>
6#include <litmus/preempt.h>
7#include <litmus/debug_trace.h>
8
9#include <litmus/litmus.h>
10#include <litmus/jobs.h>
11#include <litmus/budget.h>
12#include <litmus/litmus_proc.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/reservations/reservation.h>
16#include <litmus/reservations/alloc.h>
17
18struct pres_task_state {
19 struct reservation_client *client;
20 int cpu;
21 struct task_client res_info;
22};
23
24struct pres_cpu_state {
25 raw_spinlock_t lock;
26
27 struct sup_reservation_environment sup_env;
28 struct hrtimer timer;
29
30 int cpu;
31 struct task_struct* scheduled;
32};
33
34static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
35
36#define cpu_state_for(cpu_id) (&per_cpu(pres_cpu_state, cpu_id))
37#define local_cpu_state() (this_cpu_ptr(&pres_cpu_state))
38
39static struct pres_task_state* get_pres_state(struct task_struct *tsk)
40{
41 return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
42}
43
44static void task_departs(struct task_struct *tsk, int job_complete)
45{
46 struct pres_task_state* state = get_pres_state(tsk);
47 struct reservation* res;
48 struct reservation_client *client;
49
50 client = state->client;
51 res = client->reservation;
52
53 res->ops->client_departs(res, client, job_complete);
54 TRACE_TASK(tsk, "client_departs: removed from reservation R%d\n", res->id);
55}
56
57static void task_arrives(struct task_struct *tsk)
58{
59 struct pres_task_state* state = get_pres_state(tsk);
60 struct reservation* res;
61 struct reservation_client *client;
62
63 client = state->client;
64 res = client->reservation;
65
66 res->ops->client_arrives(res, client);
67 TRACE_TASK(tsk, "client_arrives: added to reservation R%d\n", res->id);
68}
69
70/* NOTE: drops state->lock */
71static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
72{
73 int local;
74 lt_t update, now;
75
76 update = state->sup_env.next_scheduler_update;
77 now = state->sup_env.env.current_time;
78
79 /* Be sure we're actually running on the right core,
80 * as pres_update_timer() is also called from pres_task_resume(),
81 * which might be called on any CPU when a thread resumes.
82 */
83 local = local_cpu_state() == state;
84
85 /* Must drop state lock before calling into hrtimer_start(), which
86 * may raise a softirq, which in turn may wake ksoftirqd. */
87 raw_spin_unlock(&state->lock);
88
89 if (update <= now) {
90 litmus_reschedule(state->cpu);
91 } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
92 /* Reprogram only if not already set correctly. */
93 if (!hrtimer_active(&state->timer) ||
94 ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
95 TRACE("canceling timer...\n");
96 hrtimer_cancel(&state->timer);
97 TRACE("setting scheduler timer for %llu\n", update);
98 /* We cannot use hrtimer_start() here because the
99 * wakeup flag must be set to zero. */
100 __hrtimer_start_range_ns(&state->timer,
101 ns_to_ktime(update),
102 0 /* timer coalescing slack */,
103 HRTIMER_MODE_ABS_PINNED,
104 0 /* wakeup */);
105 if (update < litmus_clock()) {
106 /* uh oh, timer expired while trying to set it */
107 TRACE("timer expired during setting "
108 "update:%llu now:%llu actual:%llu\n",
109 update, now, litmus_clock());
110 /* The timer HW may not have been reprogrammed
111 * correctly; force rescheduling now. */
112 litmus_reschedule(state->cpu);
113 }
114 }
115 } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
116 /* Poke remote core only if timer needs to be set earlier than
117 * it is currently set.
118 */
119 TRACE("pres_update_timer for remote CPU %d (update=%llu, "
120 "active:%d, set:%llu)\n",
121 state->cpu,
122 update,
123 hrtimer_active(&state->timer),
124 ktime_to_ns(hrtimer_get_expires(&state->timer)));
125 if (!hrtimer_active(&state->timer) ||
126 ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
127 TRACE("poking CPU %d so that it can update its "
128 "scheduling timer (active:%d, set:%llu)\n",
129 state->cpu,
130 hrtimer_active(&state->timer),
131 ktime_to_ns(hrtimer_get_expires(&state->timer)));
132 litmus_reschedule(state->cpu);
133 }
134 }
135}
136
137static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
138{
139 unsigned long flags;
140 enum hrtimer_restart restart = HRTIMER_NORESTART;
141 struct pres_cpu_state *state;
142 lt_t update, now;
143
144 state = container_of(timer, struct pres_cpu_state, timer);
145
146 /* The scheduling timer should only fire on the local CPU, because
147 * otherwise deadlocks via timer_cancel() are possible.
148 * Note: this does not interfere with dedicated interrupt handling, as
149 * even under dedicated interrupt handling scheduling timers for
150 * budget enforcement must occur locally on each CPU.
151 */
152 BUG_ON(state->cpu != raw_smp_processor_id());
153
154 raw_spin_lock_irqsave(&state->lock, flags);
155 sup_update_time(&state->sup_env, litmus_clock());
156
157 update = state->sup_env.next_scheduler_update;
158 now = state->sup_env.env.current_time;
159
160 TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
161 now, update, state->cpu);
162
163 if (update <= now) {
164 litmus_reschedule_local();
165 } else if (update != SUP_NO_SCHEDULER_UPDATE) {
166 hrtimer_set_expires(timer, ns_to_ktime(update));
167 restart = HRTIMER_RESTART;
168 }
169
170 raw_spin_unlock_irqrestore(&state->lock, flags);
171
172 return restart;
173}
174
175static struct task_struct* pres_schedule(struct task_struct * prev)
176{
177 /* next == NULL means "schedule background work". */
178 struct pres_cpu_state *state = local_cpu_state();
179
180 raw_spin_lock(&state->lock);
181
182 BUG_ON(state->scheduled && state->scheduled != prev);
183 BUG_ON(state->scheduled && !is_realtime(prev));
184
185 /* update time */
186 state->sup_env.will_schedule = true;
187 sup_update_time(&state->sup_env, litmus_clock());
188
189 /* figure out what to schedule next */
190 state->scheduled = sup_dispatch(&state->sup_env);
191
192 /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
193 sched_state_task_picked();
194
195 /* program scheduler timer */
196 state->sup_env.will_schedule = false;
197 /* NOTE: drops state->lock */
198 pres_update_timer_and_unlock(state);
199
200 if (prev != state->scheduled && is_realtime(prev))
201 TRACE_TASK(prev, "descheduled.\n");
202 if (state->scheduled)
203 TRACE_TASK(state->scheduled, "scheduled.\n");
204
205 return state->scheduled;
206}
207
208static void resume_legacy_task_model_updates(struct task_struct *tsk)
209{
210 lt_t now;
211 if (is_sporadic(tsk)) {
212 /* If this sporadic task was gone for a "long" time and woke up past
213 * its deadline, then give it a new budget by triggering a job
214 * release. This is purely cosmetic and has no effect on the
215 * P-RES scheduler. */
216
217 now = litmus_clock();
218 if (is_tardy(tsk, now))
219 release_at(tsk, now);
220 }
221}
222
223
224/* Called when a task should be removed from the ready queue.
225 */
226static void pres_task_block(struct task_struct *tsk)
227{
228 unsigned long flags;
229 struct pres_task_state* tinfo = get_pres_state(tsk);
230 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
231
232 TRACE_TASK(tsk, "thread suspends at %llu (state:%d, running:%d)\n",
233 litmus_clock(), tsk->state, is_current_running());
234
235 raw_spin_lock_irqsave(&state->lock, flags);
236 task_departs(tsk, is_completed(tsk));
237 raw_spin_unlock_irqrestore(&state->lock, flags);
238}
239
240
241/* Called when the state of tsk changes back to TASK_RUNNING.
242 * We need to requeue the task.
243 */
244static void pres_task_resume(struct task_struct *tsk)
245{
246 unsigned long flags;
247 struct pres_task_state* tinfo = get_pres_state(tsk);
248 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
249
250 TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
251
252 raw_spin_lock_irqsave(&state->lock, flags);
253 /* Assumption: litmus_clock() is synchronized across cores,
254 * since we might not actually be executing on tinfo->cpu
255 * at the moment. */
256 sup_update_time(&state->sup_env, litmus_clock());
257 task_arrives(tsk);
258 /* NOTE: drops state->lock */
259 pres_update_timer_and_unlock(state);
260 local_irq_restore(flags);
261
262 resume_legacy_task_model_updates(tsk);
263}
264
265static long pres_admit_task(struct task_struct *tsk)
266{
267 long err = -ESRCH;
268 unsigned long flags;
269 struct reservation *res;
270 struct pres_cpu_state *state;
271 struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
272
273 if (!tinfo)
274 return -ENOMEM;
275
276 preempt_disable();
277
278 state = cpu_state_for(task_cpu(tsk));
279 raw_spin_lock_irqsave(&state->lock, flags);
280
281 res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
282
283 /* found the appropriate reservation (or vCPU) */
284 if (res) {
285 task_client_init(&tinfo->res_info, tsk, res);
286 tinfo->cpu = task_cpu(tsk);
287 tinfo->client = &tinfo->res_info.client;
288 tsk_rt(tsk)->plugin_state = tinfo;
289 err = 0;
290
291 /* disable LITMUS^RT's per-thread budget enforcement */
292 tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
293 }
294
295 raw_spin_unlock_irqrestore(&state->lock, flags);
296
297 preempt_enable();
298
299 if (err)
300 kfree(tinfo);
301
302 return err;
303}
304
305static void task_new_legacy_task_model_updates(struct task_struct *tsk)
306{
307 lt_t now = litmus_clock();
308
309 /* the first job exists starting as of right now */
310 release_at(tsk, now);
311 sched_trace_task_release(tsk);
312}
313
314static void pres_task_new(struct task_struct *tsk, int on_runqueue,
315 int is_running)
316{
317 unsigned long flags;
318 struct pres_task_state* tinfo = get_pres_state(tsk);
319 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
320
321 TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
322 litmus_clock(), on_runqueue, is_running);
323
324 /* acquire the lock protecting the state and disable interrupts */
325 raw_spin_lock_irqsave(&state->lock, flags);
326
327 if (is_running) {
328 state->scheduled = tsk;
329 /* make sure this task should actually be running */
330 litmus_reschedule_local();
331 }
332
333 if (on_runqueue || is_running) {
334 /* Assumption: litmus_clock() is synchronized across cores
335 * [see comment in pres_task_resume()] */
336 sup_update_time(&state->sup_env, litmus_clock());
337 task_arrives(tsk);
338 /* NOTE: drops state->lock */
339 pres_update_timer_and_unlock(state);
340 local_irq_restore(flags);
341 } else
342 raw_spin_unlock_irqrestore(&state->lock, flags);
343
344 task_new_legacy_task_model_updates(tsk);
345}
346
347static void pres_task_exit(struct task_struct *tsk)
348{
349 unsigned long flags;
350 struct pres_task_state* tinfo = get_pres_state(tsk);
351 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
352
353 raw_spin_lock_irqsave(&state->lock, flags);
354
355 TRACE_TASK(tsk, "task exits at %llu (present:%d sched:%d)\n",
356 litmus_clock(), is_present(tsk), state->scheduled == tsk);
357
358 if (state->scheduled == tsk)
359 state->scheduled = NULL;
360
361 /* remove from queues */
362 if (is_present(tsk)) {
363 /* Assumption: litmus_clock() is synchronized across cores
364 * [see comment in pres_task_resume()] */
365 sup_update_time(&state->sup_env, litmus_clock());
366 task_departs(tsk, 0);
367 /* NOTE: drops state->lock */
368 pres_update_timer_and_unlock(state);
369 local_irq_restore(flags);
370 } else
371 raw_spin_unlock_irqrestore(&state->lock, flags);
372
373 kfree(tsk_rt(tsk)->plugin_state);
374 tsk_rt(tsk)->plugin_state = NULL;
375}
376
377static void pres_current_budget(lt_t *used_so_far, lt_t *remaining)
378{
379 struct pres_task_state *tstate = get_pres_state(current);
380 struct pres_cpu_state *state;
381
382 /* FIXME: protect against concurrent task_exit() */
383
384 local_irq_disable();
385
386 state = cpu_state_for(tstate->cpu);
387
388 raw_spin_lock(&state->lock);
389
390 sup_update_time(&state->sup_env, litmus_clock());
391 if (remaining)
392 *remaining = tstate->client->reservation->cur_budget;
393 if (used_so_far)
394 *used_so_far = tstate->client->reservation->budget_consumed;
395 pres_update_timer_and_unlock(state);
396
397 local_irq_enable();
398}
399
400static long do_pres_reservation_create(
401 int res_type,
402 struct reservation_config *config)
403{
404 struct pres_cpu_state *state;
405 struct reservation* res;
406 struct reservation* new_res = NULL;
407 unsigned long flags;
408 long err;
409
410 /* Allocate before we grab a spin lock. */
411 switch (res_type) {
412 case PERIODIC_POLLING:
413 case SPORADIC_POLLING:
414 err = alloc_polling_reservation(res_type, config, &new_res);
415 break;
416
417 case TABLE_DRIVEN:
418 err = alloc_table_driven_reservation(config, &new_res);
419 break;
420
421 default:
422 err = -EINVAL;
423 break;
424 }
425
426 if (err)
427 return err;
428
429 state = cpu_state_for(config->cpu);
430 raw_spin_lock_irqsave(&state->lock, flags);
431
432 res = sup_find_by_id(&state->sup_env, config->id);
433 if (!res) {
434 sup_add_new_reservation(&state->sup_env, new_res);
435 err = config->id;
436 } else {
437 err = -EEXIST;
438 }
439
440 raw_spin_unlock_irqrestore(&state->lock, flags);
441
442 if (err < 0)
443 kfree(new_res);
444
445 return err;
446}
447
448static long pres_reservation_create(int res_type, void* __user _config)
449{
450 struct reservation_config config;
451
452 TRACE("Attempt to create reservation (%d)\n", res_type);
453
454 if (copy_from_user(&config, _config, sizeof(config)))
455 return -EFAULT;
456
457 if (config.cpu < 0 || !cpu_online(config.cpu)) {
458 printk(KERN_ERR "invalid polling reservation (%u): "
459 "CPU %d offline\n", config.id, config.cpu);
460 return -EINVAL;
461 }
462
463 return do_pres_reservation_create(res_type, &config);
464}
465
466static struct domain_proc_info pres_domain_proc_info;
467
468static long pres_get_domain_proc_info(struct domain_proc_info **ret)
469{
470 *ret = &pres_domain_proc_info;
471 return 0;
472}
473
474static void pres_setup_domain_proc(void)
475{
476 int i, cpu;
477 int num_rt_cpus = num_online_cpus();
478
479 struct cd_mapping *cpu_map, *domain_map;
480
481 memset(&pres_domain_proc_info, 0, sizeof(pres_domain_proc_info));
482 init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
483 pres_domain_proc_info.num_cpus = num_rt_cpus;
484 pres_domain_proc_info.num_domains = num_rt_cpus;
485
486 i = 0;
487 for_each_online_cpu(cpu) {
488 cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
489 domain_map = &pres_domain_proc_info.domain_to_cpus[i];
490
491 cpu_map->id = cpu;
492 domain_map->id = i;
493 cpumask_set_cpu(i, cpu_map->mask);
494 cpumask_set_cpu(cpu, domain_map->mask);
495 ++i;
496 }
497}
498
499static long pres_activate_plugin(void)
500{
501 int cpu;
502 struct pres_cpu_state *state;
503
504 for_each_online_cpu(cpu) {
505 TRACE("Initializing CPU%d...\n", cpu);
506
507 state = cpu_state_for(cpu);
508
509 raw_spin_lock_init(&state->lock);
510 state->cpu = cpu;
511 state->scheduled = NULL;
512
513 sup_init(&state->sup_env);
514
515 hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
516 state->timer.function = on_scheduling_timer;
517 }
518
519 pres_setup_domain_proc();
520
521 return 0;
522}
523
524static long pres_deactivate_plugin(void)
525{
526 int cpu;
527 struct pres_cpu_state *state;
528 struct reservation *res;
529
530 for_each_online_cpu(cpu) {
531 state = cpu_state_for(cpu);
532 raw_spin_lock(&state->lock);
533
534 hrtimer_cancel(&state->timer);
535
536 /* Delete all reservations --- assumes struct reservation
537 * is prefix of containing struct. */
538
539 while (!list_empty(&state->sup_env.all_reservations)) {
540 res = list_first_entry(
541 &state->sup_env.all_reservations,
542 struct reservation, all_list);
543 list_del(&res->all_list);
544 if (res->ops->shutdown)
545 res->ops->shutdown(res);
546 kfree(res);
547 }
548
549 raw_spin_unlock(&state->lock);
550 }
551
552 destroy_domain_proc_info(&pres_domain_proc_info);
553 return 0;
554}
555
556static struct sched_plugin pres_plugin = {
557 .plugin_name = "P-RES",
558 .schedule = pres_schedule,
559 .task_block = pres_task_block,
560 .task_wake_up = pres_task_resume,
561 .admit_task = pres_admit_task,
562 .task_new = pres_task_new,
563 .task_exit = pres_task_exit,
564 .complete_job = complete_job_oneshot,
565 .get_domain_proc_info = pres_get_domain_proc_info,
566 .activate_plugin = pres_activate_plugin,
567 .deactivate_plugin = pres_deactivate_plugin,
568 .reservation_create = pres_reservation_create,
569 .current_budget = pres_current_budget,
570};
571
572static int __init init_pres(void)
573{
574 return register_sched_plugin(&pres_plugin);
575}
576
577module_init(init_pres);
578