aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/sched_pres.c
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2016-03-16 08:01:32 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2017-05-26 17:12:42 -0400
commit653815201ca8002a58458a2204d8410955288a33 (patch)
tree02d9103ba519db4235128e4c98ea1a115850d77b /litmus/sched_pres.c
parent55b4b8689a88d6cb457ecfaabbccc09d5f7c121a (diff)
Add P-RES scheduler plugin
P-RES: the partitioned reservation-based scheduler. A simple partitioned scheduler that provides a reservation environment on each core, based on the generic reservations code. Hierarchical scheduling is not supported in this version. P-RES: trace sporadic wake-ups P-RES: use inferred_sporadic_job_release_at() porting fix: add missing header for module_init() porting fix: adopt new hrtimer API in P-RES plugin P-RES: add fork() support P-RES: don't return -ESRCH on reservation lookup failure P-RES: improve task admission - Deal with the fact that task_cpu() is not yet updated for currently suspended tasks. - Provide some feedback via printk().
Diffstat (limited to 'litmus/sched_pres.c')
-rw-r--r--litmus/sched_pres.c613
1 files changed, 613 insertions, 0 deletions
diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
new file mode 100644
index 000000000000..f7fe3a31a42b
--- /dev/null
+++ b/litmus/sched_pres.c
@@ -0,0 +1,613 @@
1#include <linux/percpu.h>
2#include <linux/slab.h>
3#include <linux/module.h>
4#include <asm/uaccess.h>
5
6#include <litmus/sched_plugin.h>
7#include <litmus/preempt.h>
8#include <litmus/debug_trace.h>
9
10#include <litmus/litmus.h>
11#include <litmus/jobs.h>
12#include <litmus/budget.h>
13#include <litmus/litmus_proc.h>
14#include <litmus/sched_trace.h>
15
16#include <litmus/reservations/reservation.h>
17#include <litmus/reservations/alloc.h>
18
19struct pres_task_state {
20 struct reservation_client *client;
21 int cpu;
22 struct task_client res_info;
23};
24
25struct pres_cpu_state {
26 raw_spinlock_t lock;
27
28 struct sup_reservation_environment sup_env;
29 struct hrtimer timer;
30
31 int cpu;
32 struct task_struct* scheduled;
33};
34
35static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
36
37#define cpu_state_for(cpu_id) (&per_cpu(pres_cpu_state, cpu_id))
38#define local_cpu_state() (this_cpu_ptr(&pres_cpu_state))
39
40static struct pres_task_state* get_pres_state(struct task_struct *tsk)
41{
42 return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
43}
44
45static void task_departs(struct task_struct *tsk, int job_complete)
46{
47 struct pres_task_state* state = get_pres_state(tsk);
48 struct reservation* res;
49 struct reservation_client *client;
50
51 client = state->client;
52 res = client->reservation;
53
54 res->ops->client_departs(res, client, job_complete);
55 TRACE_TASK(tsk, "client_departs: removed from reservation R%d\n", res->id);
56}
57
58static void task_arrives(struct task_struct *tsk)
59{
60 struct pres_task_state* state = get_pres_state(tsk);
61 struct reservation* res;
62 struct reservation_client *client;
63
64 client = state->client;
65 res = client->reservation;
66
67 res->ops->client_arrives(res, client);
68 TRACE_TASK(tsk, "client_arrives: added to reservation R%d\n", res->id);
69}
70
71/* NOTE: drops state->lock */
72static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
73{
74 int local;
75 lt_t update, now;
76
77 update = state->sup_env.next_scheduler_update;
78 now = state->sup_env.env.current_time;
79
80 /* Be sure we're actually running on the right core,
81 * as pres_update_timer() is also called from pres_task_resume(),
82 * which might be called on any CPU when a thread resumes.
83 */
84 local = local_cpu_state() == state;
85
86 /* Must drop state lock before calling into hrtimer_start(), which
87 * may raise a softirq, which in turn may wake ksoftirqd. */
88 raw_spin_unlock(&state->lock);
89
90 if (update <= now) {
91 litmus_reschedule(state->cpu);
92 } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
93 /* Reprogram only if not already set correctly. */
94 if (!hrtimer_active(&state->timer) ||
95 ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
96 TRACE("canceling timer...\n");
97 hrtimer_cancel(&state->timer);
98 TRACE("setting scheduler timer for %llu\n", update);
99 /* We cannot use hrtimer_start() here because the
100 * wakeup flag must be set to zero. */
101 hrtimer_start(&state->timer,
102 ns_to_ktime(update),
103 HRTIMER_MODE_ABS_PINNED);
104 if (update < litmus_clock()) {
105 /* uh oh, timer expired while trying to set it */
106 TRACE("timer expired during setting "
107 "update:%llu now:%llu actual:%llu\n",
108 update, now, litmus_clock());
109 /* The timer HW may not have been reprogrammed
110 * correctly; force rescheduling now. */
111 litmus_reschedule(state->cpu);
112 }
113 }
114 } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
115 /* Poke remote core only if timer needs to be set earlier than
116 * it is currently set.
117 */
118 TRACE("pres_update_timer for remote CPU %d (update=%llu, "
119 "active:%d, set:%llu)\n",
120 state->cpu,
121 update,
122 hrtimer_active(&state->timer),
123 ktime_to_ns(hrtimer_get_expires(&state->timer)));
124 if (!hrtimer_active(&state->timer) ||
125 ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
126 TRACE("poking CPU %d so that it can update its "
127 "scheduling timer (active:%d, set:%llu)\n",
128 state->cpu,
129 hrtimer_active(&state->timer),
130 ktime_to_ns(hrtimer_get_expires(&state->timer)));
131 litmus_reschedule(state->cpu);
132 }
133 }
134}
135
136static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
137{
138 unsigned long flags;
139 enum hrtimer_restart restart = HRTIMER_NORESTART;
140 struct pres_cpu_state *state;
141 lt_t update, now;
142
143 state = container_of(timer, struct pres_cpu_state, timer);
144
145 /* The scheduling timer should only fire on the local CPU, because
146 * otherwise deadlocks via timer_cancel() are possible.
147 * Note: this does not interfere with dedicated interrupt handling, as
148 * even under dedicated interrupt handling scheduling timers for
149 * budget enforcement must occur locally on each CPU.
150 */
151 BUG_ON(state->cpu != raw_smp_processor_id());
152
153 raw_spin_lock_irqsave(&state->lock, flags);
154 sup_update_time(&state->sup_env, litmus_clock());
155
156 update = state->sup_env.next_scheduler_update;
157 now = state->sup_env.env.current_time;
158
159 TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
160 now, update, state->cpu);
161
162 if (update <= now) {
163 litmus_reschedule_local();
164 } else if (update != SUP_NO_SCHEDULER_UPDATE) {
165 hrtimer_set_expires(timer, ns_to_ktime(update));
166 restart = HRTIMER_RESTART;
167 }
168
169 raw_spin_unlock_irqrestore(&state->lock, flags);
170
171 return restart;
172}
173
174static struct task_struct* pres_schedule(struct task_struct * prev)
175{
176 /* next == NULL means "schedule background work". */
177 struct pres_cpu_state *state = local_cpu_state();
178
179 raw_spin_lock(&state->lock);
180
181 BUG_ON(state->scheduled && state->scheduled != prev);
182 BUG_ON(state->scheduled && !is_realtime(prev));
183
184 /* update time */
185 state->sup_env.will_schedule = true;
186 sup_update_time(&state->sup_env, litmus_clock());
187
188 /* figure out what to schedule next */
189 state->scheduled = sup_dispatch(&state->sup_env);
190
191 /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
192 sched_state_task_picked();
193
194 /* program scheduler timer */
195 state->sup_env.will_schedule = false;
196 /* NOTE: drops state->lock */
197 pres_update_timer_and_unlock(state);
198
199 if (prev != state->scheduled && is_realtime(prev))
200 TRACE_TASK(prev, "descheduled.\n");
201 if (state->scheduled)
202 TRACE_TASK(state->scheduled, "scheduled.\n");
203
204 return state->scheduled;
205}
206
207static void resume_legacy_task_model_updates(struct task_struct *tsk)
208{
209 lt_t now;
210 if (is_sporadic(tsk)) {
211 /* If this sporadic task was gone for a "long" time and woke up past
212 * its deadline, then give it a new budget by triggering a job
213 * release. This is purely cosmetic and has no effect on the
214 * P-RES scheduler. */
215
216 now = litmus_clock();
217 if (is_tardy(tsk, now)) {
218 inferred_sporadic_job_release_at(tsk, now);
219 }
220 }
221}
222
223
224/* Called when a task should be removed from the ready queue.
225 */
226static void pres_task_block(struct task_struct *tsk)
227{
228 unsigned long flags;
229 struct pres_task_state* tinfo = get_pres_state(tsk);
230 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
231
232 TRACE_TASK(tsk, "thread suspends at %llu (state:%d, running:%d)\n",
233 litmus_clock(), tsk->state, is_current_running());
234
235 raw_spin_lock_irqsave(&state->lock, flags);
236 task_departs(tsk, is_completed(tsk));
237 raw_spin_unlock_irqrestore(&state->lock, flags);
238}
239
240
241/* Called when the state of tsk changes back to TASK_RUNNING.
242 * We need to requeue the task.
243 */
244static void pres_task_resume(struct task_struct *tsk)
245{
246 unsigned long flags;
247 struct pres_task_state* tinfo = get_pres_state(tsk);
248 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
249
250 TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
251
252 raw_spin_lock_irqsave(&state->lock, flags);
253 /* Assumption: litmus_clock() is synchronized across cores,
254 * since we might not actually be executing on tinfo->cpu
255 * at the moment. */
256 sup_update_time(&state->sup_env, litmus_clock());
257 task_arrives(tsk);
258 /* NOTE: drops state->lock */
259 pres_update_timer_and_unlock(state);
260 local_irq_restore(flags);
261
262 resume_legacy_task_model_updates(tsk);
263}
264
265static long pres_admit_task(struct task_struct *tsk)
266{
267 long err = -EINVAL;
268 unsigned long flags;
269 struct reservation *res;
270 struct pres_cpu_state *state;
271 struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
272
273 if (!tinfo)
274 return -ENOMEM;
275
276 preempt_disable();
277
278 /* NOTE: this is obviously racy w.r.t. affinity changes since
279 * we are not holding any runqueue locks. */
280 if (tsk->nr_cpus_allowed != 1) {
281 printk(KERN_WARNING "%s/%d: task does not have "
282 "singleton affinity mask\n",
283 tsk->comm, tsk->pid);
284 state = cpu_state_for(task_cpu(tsk));
285 } else {
286 state = cpu_state_for(cpumask_first(&tsk->cpus_allowed));
287 }
288
289 TRACE_TASK(tsk, "on CPU %d, valid?:%d\n",
290 task_cpu(tsk), cpumask_test_cpu(task_cpu(tsk), &tsk->cpus_allowed));
291
292 raw_spin_lock_irqsave(&state->lock, flags);
293
294 res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
295
296 /* found the appropriate reservation (or vCPU) */
297 if (res) {
298 task_client_init(&tinfo->res_info, tsk, res);
299 tinfo->cpu = state->cpu;
300 tinfo->client = &tinfo->res_info.client;
301 tsk_rt(tsk)->plugin_state = tinfo;
302 err = 0;
303
304 /* disable LITMUS^RT's per-thread budget enforcement */
305 tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
306 } else {
307 printk(KERN_WARNING "Could not find reservation %d on "
308 "core %d for task %s/%d\n",
309 tsk_rt(tsk)->task_params.cpu, state->cpu,
310 tsk->comm, tsk->pid);
311 }
312
313 raw_spin_unlock_irqrestore(&state->lock, flags);
314
315 preempt_enable();
316
317 if (err)
318 kfree(tinfo);
319
320 return err;
321}
322
323static void task_new_legacy_task_model_updates(struct task_struct *tsk)
324{
325 lt_t now = litmus_clock();
326
327 /* the first job exists starting as of right now */
328 release_at(tsk, now);
329 sched_trace_task_release(tsk);
330}
331
332static void pres_task_new(struct task_struct *tsk, int on_runqueue,
333 int is_running)
334{
335 unsigned long flags;
336 struct pres_task_state* tinfo = get_pres_state(tsk);
337 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
338
339 TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
340 litmus_clock(), on_runqueue, is_running);
341
342 /* acquire the lock protecting the state and disable interrupts */
343 raw_spin_lock_irqsave(&state->lock, flags);
344
345 if (is_running) {
346 state->scheduled = tsk;
347 /* make sure this task should actually be running */
348 litmus_reschedule_local();
349 }
350
351 if (on_runqueue || is_running) {
352 /* Assumption: litmus_clock() is synchronized across cores
353 * [see comment in pres_task_resume()] */
354 sup_update_time(&state->sup_env, litmus_clock());
355 task_arrives(tsk);
356 /* NOTE: drops state->lock */
357 pres_update_timer_and_unlock(state);
358 local_irq_restore(flags);
359 } else
360 raw_spin_unlock_irqrestore(&state->lock, flags);
361
362 task_new_legacy_task_model_updates(tsk);
363}
364
365static bool pres_fork_task(struct task_struct *tsk)
366{
367 TRACE_CUR("is forking\n");
368 TRACE_TASK(tsk, "forked child rt:%d cpu:%d task_cpu:%d "
369 "wcet:%llu per:%llu\n",
370 is_realtime(tsk),
371 tsk_rt(tsk)->task_params.cpu,
372 task_cpu(tsk),
373 tsk_rt(tsk)->task_params.exec_cost,
374 tsk_rt(tsk)->task_params.period);
375
376 /* We always allow forking. */
377 /* The newly forked task will be in the same reservation. */
378 return true;
379}
380
381static void pres_task_exit(struct task_struct *tsk)
382{
383 unsigned long flags;
384 struct pres_task_state* tinfo = get_pres_state(tsk);
385 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
386
387 raw_spin_lock_irqsave(&state->lock, flags);
388
389 TRACE_TASK(tsk, "task exits at %llu (present:%d sched:%d)\n",
390 litmus_clock(), is_present(tsk), state->scheduled == tsk);
391
392 if (state->scheduled == tsk)
393 state->scheduled = NULL;
394
395 /* remove from queues */
396 if (is_present(tsk)) {
397 /* Assumption: litmus_clock() is synchronized across cores
398 * [see comment in pres_task_resume()] */
399 sup_update_time(&state->sup_env, litmus_clock());
400 task_departs(tsk, 0);
401 /* NOTE: drops state->lock */
402 pres_update_timer_and_unlock(state);
403 local_irq_restore(flags);
404 } else
405 raw_spin_unlock_irqrestore(&state->lock, flags);
406
407 kfree(tsk_rt(tsk)->plugin_state);
408 tsk_rt(tsk)->plugin_state = NULL;
409}
410
411static void pres_current_budget(lt_t *used_so_far, lt_t *remaining)
412{
413 struct pres_task_state *tstate = get_pres_state(current);
414 struct pres_cpu_state *state;
415
416 /* FIXME: protect against concurrent task_exit() */
417
418 local_irq_disable();
419
420 state = cpu_state_for(tstate->cpu);
421
422 raw_spin_lock(&state->lock);
423
424 sup_update_time(&state->sup_env, litmus_clock());
425 if (remaining)
426 *remaining = tstate->client->reservation->cur_budget;
427 if (used_so_far)
428 *used_so_far = tstate->client->reservation->budget_consumed;
429 pres_update_timer_and_unlock(state);
430
431 local_irq_enable();
432}
433
434static long do_pres_reservation_create(
435 int res_type,
436 struct reservation_config *config)
437{
438 struct pres_cpu_state *state;
439 struct reservation* res;
440 struct reservation* new_res = NULL;
441 unsigned long flags;
442 long err;
443
444 /* Allocate before we grab a spin lock. */
445 switch (res_type) {
446 case PERIODIC_POLLING:
447 case SPORADIC_POLLING:
448 err = alloc_polling_reservation(res_type, config, &new_res);
449 break;
450
451 case TABLE_DRIVEN:
452 err = alloc_table_driven_reservation(config, &new_res);
453 break;
454
455 default:
456 err = -EINVAL;
457 break;
458 }
459
460 if (err)
461 return err;
462
463 state = cpu_state_for(config->cpu);
464 raw_spin_lock_irqsave(&state->lock, flags);
465
466 res = sup_find_by_id(&state->sup_env, config->id);
467 if (!res) {
468 sup_add_new_reservation(&state->sup_env, new_res);
469 err = config->id;
470 } else {
471 err = -EEXIST;
472 }
473
474 raw_spin_unlock_irqrestore(&state->lock, flags);
475
476 if (err < 0)
477 kfree(new_res);
478
479 return err;
480}
481
482static long pres_reservation_create(int res_type, void* __user _config)
483{
484 struct reservation_config config;
485
486 TRACE("Attempt to create reservation (%d)\n", res_type);
487
488 if (copy_from_user(&config, _config, sizeof(config)))
489 return -EFAULT;
490
491 if (config.cpu < 0 || !cpu_online(config.cpu)) {
492 printk(KERN_ERR "invalid polling reservation (%u): "
493 "CPU %d offline\n", config.id, config.cpu);
494 return -EINVAL;
495 }
496
497 return do_pres_reservation_create(res_type, &config);
498}
499
500static struct domain_proc_info pres_domain_proc_info;
501
502static long pres_get_domain_proc_info(struct domain_proc_info **ret)
503{
504 *ret = &pres_domain_proc_info;
505 return 0;
506}
507
508static void pres_setup_domain_proc(void)
509{
510 int i, cpu;
511 int num_rt_cpus = num_online_cpus();
512
513 struct cd_mapping *cpu_map, *domain_map;
514
515 memset(&pres_domain_proc_info, 0, sizeof(pres_domain_proc_info));
516 init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
517 pres_domain_proc_info.num_cpus = num_rt_cpus;
518 pres_domain_proc_info.num_domains = num_rt_cpus;
519
520 i = 0;
521 for_each_online_cpu(cpu) {
522 cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
523 domain_map = &pres_domain_proc_info.domain_to_cpus[i];
524
525 cpu_map->id = cpu;
526 domain_map->id = i;
527 cpumask_set_cpu(i, cpu_map->mask);
528 cpumask_set_cpu(cpu, domain_map->mask);
529 ++i;
530 }
531}
532
533static long pres_activate_plugin(void)
534{
535 int cpu;
536 struct pres_cpu_state *state;
537
538 for_each_online_cpu(cpu) {
539 TRACE("Initializing CPU%d...\n", cpu);
540
541 state = cpu_state_for(cpu);
542
543 raw_spin_lock_init(&state->lock);
544 state->cpu = cpu;
545 state->scheduled = NULL;
546
547 sup_init(&state->sup_env);
548
549 hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
550 state->timer.function = on_scheduling_timer;
551 }
552
553 pres_setup_domain_proc();
554
555 return 0;
556}
557
558static long pres_deactivate_plugin(void)
559{
560 int cpu;
561 struct pres_cpu_state *state;
562 struct reservation *res;
563
564 for_each_online_cpu(cpu) {
565 state = cpu_state_for(cpu);
566 raw_spin_lock(&state->lock);
567
568 hrtimer_cancel(&state->timer);
569
570 /* Delete all reservations --- assumes struct reservation
571 * is prefix of containing struct. */
572
573 while (!list_empty(&state->sup_env.all_reservations)) {
574 res = list_first_entry(
575 &state->sup_env.all_reservations,
576 struct reservation, all_list);
577 list_del(&res->all_list);
578 if (res->ops->shutdown)
579 res->ops->shutdown(res);
580 kfree(res);
581 }
582
583 raw_spin_unlock(&state->lock);
584 }
585
586 destroy_domain_proc_info(&pres_domain_proc_info);
587 return 0;
588}
589
590static struct sched_plugin pres_plugin = {
591 .plugin_name = "P-RES",
592 .schedule = pres_schedule,
593 .task_block = pres_task_block,
594 .task_wake_up = pres_task_resume,
595 .admit_task = pres_admit_task,
596 .task_new = pres_task_new,
597 .fork_task = pres_fork_task,
598 .task_exit = pres_task_exit,
599 .complete_job = complete_job_oneshot,
600 .get_domain_proc_info = pres_get_domain_proc_info,
601 .activate_plugin = pres_activate_plugin,
602 .deactivate_plugin = pres_deactivate_plugin,
603 .reservation_create = pres_reservation_create,
604 .current_budget = pres_current_budget,
605};
606
607static int __init init_pres(void)
608{
609 return register_sched_plugin(&pres_plugin);
610}
611
612module_init(init_pres);
613