aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/litmus.c
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:48 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 06:21:18 -0400
commit8e048c798adaabef530a1526f7ce8c6c3cd3475e (patch)
tree5a96b3eaeaafecec1bf08ba71a9d0084d39d46eb /litmus/litmus.c
parentbd175e94795774908317a861a883761b75750e35 (diff)
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux.
Diffstat (limited to 'litmus/litmus.c')
-rw-r--r--litmus/litmus.c681
1 files changed, 681 insertions, 0 deletions
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..703360c68609
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,681 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14#include <linux/sched/rt.h>
15#include <linux/rwsem.h>
16#include <linux/interrupt.h>
17
18#include <litmus/litmus.h>
19#include <litmus/bheap.h>
20#include <litmus/trace.h>
21#include <litmus/rt_domain.h>
22#include <litmus/litmus_proc.h>
23#include <litmus/sched_trace.h>
24
25#ifdef CONFIG_SCHED_CPU_AFFINITY
26#include <litmus/affinity.h>
27#endif
28
29/* Number of RT tasks that exist in the system */
30atomic_t rt_task_count = ATOMIC_INIT(0);
31
32#ifdef CONFIG_RELEASE_MASTER
33/* current master CPU for handling timer IRQs */
34atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
35#endif
36
37static struct kmem_cache * bheap_node_cache;
38extern struct kmem_cache * release_heap_cache;
39
40struct bheap_node* bheap_node_alloc(int gfp_flags)
41{
42 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
43}
44
45void bheap_node_free(struct bheap_node* hn)
46{
47 kmem_cache_free(bheap_node_cache, hn);
48}
49
50struct release_heap* release_heap_alloc(int gfp_flags);
51void release_heap_free(struct release_heap* rh);
52
53/**
54 * Get the quantum alignment as a cmdline option.
55 * Default is staggered quanta, as this results in lower overheads.
56 */
57static bool aligned_quanta = 0;
58module_param(aligned_quanta, bool, 0644);
59
60u64 cpu_stagger_offset(int cpu)
61{
62 u64 offset = 0;
63
64 if (!aligned_quanta) {
65 offset = LITMUS_QUANTUM_LENGTH_NS;
66 do_div(offset, num_possible_cpus());
67 offset *= cpu;
68 }
69 return offset;
70}
71
72/*
73 * sys_set_task_rt_param
74 * @pid: Pid of the task which scheduling parameters must be changed
75 * @param: New real-time extension parameters such as the execution cost and
76 * period
77 * Syscall for manipulating with task rt extension params
78 * Returns EFAULT if param is NULL.
79 * ESRCH if pid is not corrsponding
80 * to a valid task.
81 * EINVAL if either period or execution cost is <=0
82 * EPERM if pid is a real-time task
83 * 0 if success
84 *
85 * Only non-real-time tasks may be configured with this system call
86 * to avoid races with the scheduler. In practice, this means that a
87 * task's parameters must be set _before_ calling sys_prepare_rt_task()
88 *
89 * find_task_by_vpid() assumes that we are in the same namespace of the
90 * target.
91 */
92asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
93{
94 struct rt_task tp;
95 struct task_struct *target;
96 int retval = -EINVAL;
97
98 printk("Setting up rt task parameters for process %d.\n", pid);
99
100 if (pid < 0 || param == 0) {
101 goto out;
102 }
103 if (copy_from_user(&tp, param, sizeof(tp))) {
104 retval = -EFAULT;
105 goto out;
106 }
107
108 /* Task search and manipulation must be protected */
109 read_lock_irq(&tasklist_lock);
110 rcu_read_lock();
111 if (!(target = find_task_by_vpid(pid))) {
112 retval = -ESRCH;
113 rcu_read_unlock();
114 goto out_unlock;
115 }
116 rcu_read_unlock();
117
118 if (is_realtime(target)) {
119 /* The task is already a real-time task.
120 * We cannot not allow parameter changes at this point.
121 */
122 retval = -EBUSY;
123 goto out_unlock;
124 }
125
126 /* set relative deadline to be implicit if left unspecified */
127 if (tp.relative_deadline == 0)
128 tp.relative_deadline = tp.period;
129
130 if (tp.exec_cost <= 0)
131 goto out_unlock;
132 if (tp.period <= 0)
133 goto out_unlock;
134 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
135 {
136 printk(KERN_INFO "litmus: real-time task %d rejected "
137 "because task density > 1.0\n", pid);
138 goto out_unlock;
139 }
140 if (tp.cls != RT_CLASS_HARD &&
141 tp.cls != RT_CLASS_SOFT &&
142 tp.cls != RT_CLASS_BEST_EFFORT)
143 {
144 printk(KERN_INFO "litmus: real-time task %d rejected "
145 "because its class is invalid\n", pid);
146 goto out_unlock;
147 }
148 if (tp.budget_policy != NO_ENFORCEMENT &&
149 tp.budget_policy != QUANTUM_ENFORCEMENT &&
150 tp.budget_policy != PRECISE_ENFORCEMENT)
151 {
152 printk(KERN_INFO "litmus: real-time task %d rejected "
153 "because unsupported budget enforcement policy "
154 "specified (%d)\n",
155 pid, tp.budget_policy);
156 goto out_unlock;
157 }
158
159 target->rt_param.task_params = tp;
160
161 retval = 0;
162 out_unlock:
163 read_unlock_irq(&tasklist_lock);
164 out:
165 return retval;
166}
167
168/*
169 * Getter of task's RT params
170 * returns EINVAL if param or pid is NULL
171 * returns ESRCH if pid does not correspond to a valid task
172 * returns EFAULT if copying of parameters has failed.
173 *
174 * find_task_by_vpid() assumes that we are in the same namespace of the
175 * target.
176 */
177asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
178{
179 int retval = -EINVAL;
180 struct task_struct *source;
181 struct rt_task lp;
182 if (param == 0 || pid < 0)
183 goto out;
184 read_lock(&tasklist_lock);
185 if (!(source = find_task_by_vpid(pid))) {
186 retval = -ESRCH;
187 goto out_unlock;
188 }
189 lp = source->rt_param.task_params;
190 read_unlock(&tasklist_lock);
191 /* Do copying outside the lock */
192 retval =
193 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
194 return retval;
195 out_unlock:
196 read_unlock(&tasklist_lock);
197 out:
198 return retval;
199
200}
201
202/*
203 * This is the crucial function for periodic task implementation,
204 * It checks if a task is periodic, checks if such kind of sleep
205 * is permitted and calls plugin-specific sleep, which puts the
206 * task into a wait array.
207 * returns 0 on successful wakeup
208 * returns EPERM if current conditions do not permit such sleep
209 * returns EINVAL if current task is not able to go to sleep
210 */
211asmlinkage long sys_complete_job(void)
212{
213 int retval = -EPERM;
214 if (!is_realtime(current)) {
215 retval = -EINVAL;
216 goto out;
217 }
218 /* Task with negative or zero period cannot sleep */
219 if (get_rt_period(current) <= 0) {
220 retval = -EINVAL;
221 goto out;
222 }
223 /* The plugin has to put the task into an
224 * appropriate queue and call schedule
225 */
226 retval = litmus->complete_job();
227 out:
228 return retval;
229}
230
231/* This is an "improved" version of sys_complete_job that
232 * addresses the problem of unintentionally missing a job after
233 * an overrun.
234 *
235 * returns 0 on successful wakeup
236 * returns EPERM if current conditions do not permit such sleep
237 * returns EINVAL if current task is not able to go to sleep
238 */
239asmlinkage long sys_wait_for_job_release(unsigned int job)
240{
241 int retval = -EPERM;
242 if (!is_realtime(current)) {
243 retval = -EINVAL;
244 goto out;
245 }
246
247 /* Task with negative or zero period cannot sleep */
248 if (get_rt_period(current) <= 0) {
249 retval = -EINVAL;
250 goto out;
251 }
252
253 retval = 0;
254
255 /* first wait until we have "reached" the desired job
256 *
257 * This implementation has at least two problems:
258 *
259 * 1) It doesn't gracefully handle the wrap around of
260 * job_no. Since LITMUS is a prototype, this is not much
261 * of a problem right now.
262 *
263 * 2) It is theoretically racy if a job release occurs
264 * between checking job_no and calling sleep_next_period().
265 * A proper solution would requiring adding another callback
266 * in the plugin structure and testing the condition with
267 * interrupts disabled.
268 *
269 * FIXME: At least problem 2 should be taken care of eventually.
270 */
271 while (!retval && job > current->rt_param.job_params.job_no)
272 /* If the last job overran then job <= job_no and we
273 * don't send the task to sleep.
274 */
275 retval = litmus->complete_job();
276 out:
277 return retval;
278}
279
280/* This is a helper syscall to query the current job sequence number.
281 *
282 * returns 0 on successful query
283 * returns EPERM if task is not a real-time task.
284 * returns EFAULT if &job is not a valid pointer.
285 */
286asmlinkage long sys_query_job_no(unsigned int __user *job)
287{
288 int retval = -EPERM;
289 if (is_realtime(current))
290 retval = put_user(current->rt_param.job_params.job_no, job);
291
292 return retval;
293}
294
295/* sys_null_call() is only used for determining raw system call
296 * overheads (kernel entry, kernel exit). It has no useful side effects.
297 * If ts is non-NULL, then the current Feather-Trace time is recorded.
298 */
299asmlinkage long sys_null_call(cycles_t __user *ts)
300{
301 long ret = 0;
302 cycles_t now;
303
304 if (ts) {
305 now = get_cycles();
306 ret = put_user(now, ts);
307 }
308
309 return ret;
310}
311
312/* p is a real-time task. Re-init its state as a best-effort task. */
313static void reinit_litmus_state(struct task_struct* p, int restore)
314{
315 struct rt_task user_config = {};
316 void* ctrl_page = NULL;
317
318 if (restore) {
319 /* Safe user-space provided configuration data.
320 * and allocated page. */
321 user_config = p->rt_param.task_params;
322 ctrl_page = p->rt_param.ctrl_page;
323 }
324
325 /* We probably should not be inheriting any task's priority
326 * at this point in time.
327 */
328 WARN_ON(p->rt_param.inh_task);
329
330 /* Cleanup everything else. */
331 memset(&p->rt_param, 0, sizeof(p->rt_param));
332
333 /* Restore preserved fields. */
334 if (restore) {
335 p->rt_param.task_params = user_config;
336 p->rt_param.ctrl_page = ctrl_page;
337 }
338}
339
340long litmus_admit_task(struct task_struct* tsk)
341{
342 long retval = 0;
343
344 BUG_ON(is_realtime(tsk));
345
346 tsk_rt(tsk)->heap_node = NULL;
347 tsk_rt(tsk)->rel_heap = NULL;
348
349 if (get_rt_relative_deadline(tsk) == 0 ||
350 get_exec_cost(tsk) >
351 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
352 TRACE_TASK(tsk,
353 "litmus admit: invalid task parameters "
354 "(e = %lu, p = %lu, d = %lu)\n",
355 get_exec_cost(tsk), get_rt_period(tsk),
356 get_rt_relative_deadline(tsk));
357 retval = -EINVAL;
358 goto out;
359 }
360
361 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
362
363 /* allocate heap node for this task */
364 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
365 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
366
367 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
368 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
369
370 retval = -ENOMEM;
371 goto out;
372 } else {
373 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
374 }
375
376 preempt_disable();
377
378 retval = litmus->admit_task(tsk);
379
380 if (!retval) {
381 sched_trace_task_name(tsk);
382 sched_trace_task_param(tsk);
383 atomic_inc(&rt_task_count);
384 }
385
386 preempt_enable();
387
388out:
389 if (retval) {
390 if (tsk_rt(tsk)->heap_node)
391 bheap_node_free(tsk_rt(tsk)->heap_node);
392 if (tsk_rt(tsk)->rel_heap)
393 release_heap_free(tsk_rt(tsk)->rel_heap);
394 }
395 return retval;
396}
397
398void litmus_clear_state(struct task_struct* tsk)
399{
400 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
401 bheap_node_free(tsk_rt(tsk)->heap_node);
402 release_heap_free(tsk_rt(tsk)->rel_heap);
403
404 atomic_dec(&rt_task_count);
405 reinit_litmus_state(tsk, 1);
406}
407
408/* called from sched_setscheduler() */
409void litmus_exit_task(struct task_struct* tsk)
410{
411 if (is_realtime(tsk)) {
412 sched_trace_task_completion(tsk, 1);
413
414 litmus->task_exit(tsk);
415 }
416}
417
418static DECLARE_RWSEM(plugin_switch_mutex);
419
420void litmus_plugin_switch_disable(void)
421{
422 down_read(&plugin_switch_mutex);
423}
424
425void litmus_plugin_switch_enable(void)
426{
427 up_read(&plugin_switch_mutex);
428}
429
430static int __do_plugin_switch(struct sched_plugin* plugin)
431{
432 int ret;
433
434
435 /* don't switch if there are active real-time tasks */
436 if (atomic_read(&rt_task_count) == 0) {
437 TRACE("deactivating plugin %s\n", litmus->plugin_name);
438 ret = litmus->deactivate_plugin();
439 if (0 != ret)
440 goto out;
441
442 TRACE("activating plugin %s\n", plugin->plugin_name);
443 ret = plugin->activate_plugin();
444 if (0 != ret) {
445 printk(KERN_INFO "Can't activate %s (%d).\n",
446 plugin->plugin_name, ret);
447 plugin = &linux_sched_plugin;
448 }
449
450 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
451 litmus = plugin;
452 } else
453 ret = -EBUSY;
454out:
455 TRACE("do_plugin_switch() => %d\n", ret);
456 return ret;
457}
458
459static atomic_t ready_to_switch;
460
461static int do_plugin_switch(void *_plugin)
462{
463 unsigned long flags;
464 int ret = 0;
465
466 local_save_flags(flags);
467 local_irq_disable();
468 hard_irq_disable();
469
470 if (atomic_dec_and_test(&ready_to_switch))
471 {
472 ret = __do_plugin_switch((struct sched_plugin*) _plugin);
473 atomic_set(&ready_to_switch, INT_MAX);
474 }
475
476 do {
477 cpu_relax();
478 } while (atomic_read(&ready_to_switch) != INT_MAX);
479
480 local_irq_restore(flags);
481 return ret;
482}
483
484/* Switching a plugin in use is tricky.
485 * We must watch out that no real-time tasks exists
486 * (and that none is created in parallel) and that the plugin is not
487 * currently in use on any processor (in theory).
488 */
489int switch_sched_plugin(struct sched_plugin* plugin)
490{
491 int err;
492 struct domain_proc_info* domain_info;
493
494 BUG_ON(!plugin);
495
496 if (atomic_read(&rt_task_count) == 0) {
497 down_write(&plugin_switch_mutex);
498
499 deactivate_domain_proc();
500
501 get_online_cpus();
502 atomic_set(&ready_to_switch, num_online_cpus());
503 err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
504 put_online_cpus();
505
506 if (!litmus->get_domain_proc_info(&domain_info))
507 activate_domain_proc(domain_info);
508
509 up_write(&plugin_switch_mutex);
510 return err;
511 } else
512 return -EBUSY;
513}
514
515/* Called upon fork.
516 * p is the newly forked task.
517 */
518void litmus_fork(struct task_struct* p)
519{
520 if (is_realtime(p)) {
521 /* clean out any litmus related state, don't preserve anything */
522 reinit_litmus_state(p, 0);
523 /* Don't let the child be a real-time task. */
524 p->sched_reset_on_fork = 1;
525 } else
526 /* non-rt tasks might have ctrl_page set */
527 tsk_rt(p)->ctrl_page = NULL;
528
529 /* od tables are never inherited across a fork */
530 p->od_table = NULL;
531}
532
533/* Called upon execve().
534 * current is doing the exec.
535 * Don't let address space specific stuff leak.
536 */
537void litmus_exec(void)
538{
539 struct task_struct* p = current;
540
541 if (is_realtime(p)) {
542 WARN_ON(p->rt_param.inh_task);
543 if (tsk_rt(p)->ctrl_page) {
544 free_page((unsigned long) tsk_rt(p)->ctrl_page);
545 tsk_rt(p)->ctrl_page = NULL;
546 }
547 }
548}
549
550/* Called when dead_tsk is being deallocated
551 */
552void exit_litmus(struct task_struct *dead_tsk)
553{
554 /* We also allow non-RT tasks to
555 * allocate control pages to allow
556 * measurements with non-RT tasks.
557 * So check if we need to free the page
558 * in any case.
559 */
560 if (tsk_rt(dead_tsk)->ctrl_page) {
561 TRACE_TASK(dead_tsk,
562 "freeing ctrl_page %p\n",
563 tsk_rt(dead_tsk)->ctrl_page);
564 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
565 }
566
567 /* Tasks should not be real-time tasks any longer at this point. */
568 BUG_ON(is_realtime(dead_tsk));
569}
570
571void litmus_do_exit(struct task_struct *exiting_tsk)
572{
573 /* This task called do_exit(), but is still a real-time task. To avoid
574 * complications later, we force it to be a non-real-time task now. */
575
576 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
577
578 TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
579 sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
580}
581
582void litmus_dealloc(struct task_struct *tsk)
583{
584 /* tsk is no longer a real-time task */
585 TRACE_TASK(tsk, "Deallocating real-time task data\n");
586 litmus->task_cleanup(tsk);
587 litmus_clear_state(tsk);
588}
589
590/* move current non-RT task to a specific CPU */
591int litmus_be_migrate_to(int cpu)
592{
593 struct cpumask single_cpu_aff;
594
595 cpumask_clear(&single_cpu_aff);
596 cpumask_set_cpu(cpu, &single_cpu_aff);
597 return sched_setaffinity(current->pid, &single_cpu_aff);
598}
599
600#ifdef CONFIG_MAGIC_SYSRQ
601int sys_kill(int pid, int sig);
602
603static void sysrq_handle_kill_rt_tasks(int key)
604{
605 struct task_struct *t;
606 read_lock(&tasklist_lock);
607 for_each_process(t) {
608 if (is_realtime(t)) {
609 sys_kill(t->pid, SIGKILL);
610 }
611 }
612 read_unlock(&tasklist_lock);
613}
614
615static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
616 .handler = sysrq_handle_kill_rt_tasks,
617 .help_msg = "quit-rt-tasks(X)",
618 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
619};
620#endif
621
622extern struct sched_plugin linux_sched_plugin;
623
624static int litmus_shutdown_nb(struct notifier_block *unused1,
625 unsigned long unused2, void *unused3)
626{
627 /* Attempt to switch back to regular Linux scheduling.
628 * Forces the active plugin to clean up.
629 */
630 if (litmus != &linux_sched_plugin) {
631 int ret = switch_sched_plugin(&linux_sched_plugin);
632 if (ret) {
633 printk("Auto-shutdown of active Litmus plugin failed.\n");
634 }
635 }
636 return NOTIFY_DONE;
637}
638
639static struct notifier_block shutdown_notifier = {
640 .notifier_call = litmus_shutdown_nb,
641};
642
643static int __init _init_litmus(void)
644{
645 /* Common initializers,
646 * mode change lock is used to enforce single mode change
647 * operation.
648 */
649 printk("Starting LITMUS^RT kernel\n");
650
651 register_sched_plugin(&linux_sched_plugin);
652
653 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
654 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
655
656#ifdef CONFIG_MAGIC_SYSRQ
657 /* offer some debugging help */
658 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
659 printk("Registered kill rt tasks magic sysrq.\n");
660 else
661 printk("Could not register kill rt tasks magic sysrq.\n");
662#endif
663
664 init_litmus_proc();
665
666 register_reboot_notifier(&shutdown_notifier);
667
668 return 0;
669}
670
671static void _exit_litmus(void)
672{
673 unregister_reboot_notifier(&shutdown_notifier);
674
675 exit_litmus_proc();
676 kmem_cache_destroy(bheap_node_cache);
677 kmem_cache_destroy(release_heap_cache);
678}
679
680module_init(_init_litmus);
681module_exit(_exit_litmus);