Add LITMUS^RT core implementation

This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux.
author: Bjoern Brandenburg <bbb@mpi-sws.org> 2015-08-09 07:18:48 -0400
committer: Bjoern Brandenburg <bbb@mpi-sws.org> 2015-08-09 06:21:18 -0400
commit: 8e048c798adaabef530a1526f7ce8c6c3cd3475e (patch)
tree: 5a96b3eaeaafecec1bf08ba71a9d0084d39d46eb /litmus/litmus.c
parent: bd175e94795774908317a861a883761b75750e35 (diff)
1 files changed, 681 insertions, 0 deletions
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..703360c68609
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,681 @@
+/*
+ * litmus.c -- Implementation of the LITMUS syscalls,
+ *             the LITMUS intialization code,
+ *             and the procfs interface..
+ */
+#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/sysrq.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/reboot.h>
+#include <linux/stop_machine.h>
+#include <linux/sched/rt.h>
+#include <linux/rwsem.h>
+#include <linux/interrupt.h>
+#include <litmus/litmus.h>
+#include <litmus/bheap.h>
+#include <litmus/trace.h>
+#include <litmus/rt_domain.h>
+#include <litmus/litmus_proc.h>
+#include <litmus/sched_trace.h>
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+/* Number of RT tasks that exist in the system */
+atomic_t rt_task_count          = ATOMIC_INIT(0);
+#ifdef CONFIG_RELEASE_MASTER
+/* current master CPU for handling timer IRQs */
+atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
+#endif
+static struct kmem_cache * bheap_node_cache;
+extern struct kmem_cache * release_heap_cache;
+struct bheap_node* bheap_node_alloc(int gfp_flags)
+{
+        return kmem_cache_alloc(bheap_node_cache, gfp_flags);
+}
+void bheap_node_free(struct bheap_node* hn)
+{
+        kmem_cache_free(bheap_node_cache, hn);
+}
+struct release_heap* release_heap_alloc(int gfp_flags);
+void release_heap_free(struct release_heap* rh);
+/**
+ * Get the quantum alignment as a cmdline option.
+ * Default is staggered quanta, as this results in lower overheads.
+ */
+static bool aligned_quanta = 0;
+module_param(aligned_quanta, bool, 0644);
+u64 cpu_stagger_offset(int cpu)
+{
+        u64 offset = 0;
+        if (!aligned_quanta) {
+                offset = LITMUS_QUANTUM_LENGTH_NS;
+                do_div(offset, num_possible_cpus());
+                offset *= cpu;
+        }
+        return offset;
+}
+/*
+ * sys_set_task_rt_param
+ * @pid: Pid of the task which scheduling parameters must be changed
+ * @param: New real-time extension parameters such as the execution cost and
+ *         period
+ * Syscall for manipulating with task rt extension params
+ * Returns EFAULT  if param is NULL.
+ *         ESRCH   if pid is not corrsponding
+ *                 to a valid task.
+ *         EINVAL  if either period or execution cost is <=0
+ *         EPERM   if pid is a real-time task
+ *         0       if success
+ *
+ * Only non-real-time tasks may be configured with this system call
+ * to avoid races with the scheduler. In practice, this means that a
+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
+ *
+ * find_task_by_vpid() assumes that we are in the same namespace of the
+ * target.
+ */
+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+        struct rt_task tp;
+        struct task_struct *target;
+        int retval = -EINVAL;
+        printk("Setting up rt task parameters for process %d.\n", pid);
+        if (pid < 0 || param == 0) {
+                goto out;
+        }
+        if (copy_from_user(&tp, param, sizeof(tp))) {
+                retval = -EFAULT;
+                goto out;
+        }
+        /* Task search and manipulation must be protected */
+        read_lock_irq(&tasklist_lock);
+        rcu_read_lock();
+        if (!(target = find_task_by_vpid(pid))) {
+                retval = -ESRCH;
+                rcu_read_unlock();
+                goto out_unlock;
+        }
+        rcu_read_unlock();
+        if (is_realtime(target)) {
+                /* The task is already a real-time task.
+                 * We cannot not allow parameter changes at this point.
+                 */
+                retval = -EBUSY;
+                goto out_unlock;
+        }
+        /* set relative deadline to be implicit if left unspecified */
+        if (tp.relative_deadline == 0)
+                tp.relative_deadline = tp.period;
+        if (tp.exec_cost <= 0)
+                goto out_unlock;
+        if (tp.period <= 0)
+                goto out_unlock;
+        if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
+        {
+                printk(KERN_INFO "litmus: real-time task %d rejected "
+                       "because task density > 1.0\n", pid);
+                goto out_unlock;
+        }
+        if (tp.cls != RT_CLASS_HARD &&
+            tp.cls != RT_CLASS_SOFT &&
+            tp.cls != RT_CLASS_BEST_EFFORT)
+        {
+                printk(KERN_INFO "litmus: real-time task %d rejected "
+                                 "because its class is invalid\n", pid);
+                goto out_unlock;
+        }
+        if (tp.budget_policy != NO_ENFORCEMENT &&
+            tp.budget_policy != QUANTUM_ENFORCEMENT &&
+            tp.budget_policy != PRECISE_ENFORCEMENT)
+        {
+                printk(KERN_INFO "litmus: real-time task %d rejected "
+                       "because unsupported budget enforcement policy "
+                       "specified (%d)\n",
+                       pid, tp.budget_policy);
+                goto out_unlock;
+        }
+        target->rt_param.task_params = tp;
+        retval = 0;
+      out_unlock:
+        read_unlock_irq(&tasklist_lock);
+      out:
+        return retval;
+}
+/*
+ * Getter of task's RT params
+ *   returns EINVAL if param or pid is NULL
+ *   returns ESRCH  if pid does not correspond to a valid task
+ *   returns EFAULT if copying of parameters has failed.
+ *
+ *   find_task_by_vpid() assumes that we are in the same namespace of the
+ *   target.
+ */
+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+        int retval = -EINVAL;
+        struct task_struct *source;
+        struct rt_task lp;
+        if (param == 0 || pid < 0)
+                goto out;
+        read_lock(&tasklist_lock);
+        if (!(source = find_task_by_vpid(pid))) {
+                retval = -ESRCH;
+                goto out_unlock;
+        }
+        lp = source->rt_param.task_params;
+        read_unlock(&tasklist_lock);
+        /* Do copying outside the lock */
+        retval =
+            copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
+        return retval;
+      out_unlock:
+        read_unlock(&tasklist_lock);
+      out:
+        return retval;
+}
+/*
+ *      This is the crucial function for periodic task implementation,
+ *      It checks if a task is periodic, checks if such kind of sleep
+ *      is permitted and calls plugin-specific sleep, which puts the
+ *      task into a wait array.
+ *      returns 0 on successful wakeup
+ *      returns EPERM if current conditions do not permit such sleep
+ *      returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_complete_job(void)
+{
+        int retval = -EPERM;
+        if (!is_realtime(current)) {
+                retval = -EINVAL;
+                goto out;
+        }
+        /* Task with negative or zero period cannot sleep */
+        if (get_rt_period(current) <= 0) {
+                retval = -EINVAL;
+                goto out;
+        }
+        /* The plugin has to put the task into an
+         * appropriate queue and call schedule
+         */
+        retval = litmus->complete_job();
+      out:
+        return retval;
+}
+/*      This is an "improved" version of sys_complete_job that
+ *      addresses the problem of unintentionally missing a job after
+ *      an overrun.
+ *
+ *      returns 0 on successful wakeup
+ *      returns EPERM if current conditions do not permit such sleep
+ *      returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_wait_for_job_release(unsigned int job)
+{
+        int retval = -EPERM;
+        if (!is_realtime(current)) {
+                retval = -EINVAL;
+                goto out;
+        }
+        /* Task with negative or zero period cannot sleep */
+        if (get_rt_period(current) <= 0) {
+                retval = -EINVAL;
+                goto out;
+        }
+        retval = 0;
+        /* first wait until we have "reached" the desired job
+         *
+         * This implementation has at least two problems:
+         *
+         * 1) It doesn't gracefully handle the wrap around of
+         *    job_no. Since LITMUS is a prototype, this is not much
+         *    of a problem right now.
+         *
+         * 2) It is theoretically racy if a job release occurs
+         *    between checking job_no and calling sleep_next_period().
+         *    A proper solution would requiring adding another callback
+         *    in the plugin structure and testing the condition with
+         *    interrupts disabled.
+         *
+         * FIXME: At least problem 2 should be taken care of eventually.
+         */
+        while (!retval && job > current->rt_param.job_params.job_no)
+                /* If the last job overran then job <= job_no and we
+                 * don't send the task to sleep.
+                 */
+                retval = litmus->complete_job();
+      out:
+        return retval;
+}
+/*      This is a helper syscall to query the current job sequence number.
+ *
+ *      returns 0 on successful query
+ *      returns EPERM if task is not a real-time task.
+ *      returns EFAULT if &job is not a valid pointer.
+ */
+asmlinkage long sys_query_job_no(unsigned int __user *job)
+{
+        int retval = -EPERM;
+        if (is_realtime(current))
+                retval = put_user(current->rt_param.job_params.job_no, job);
+        return retval;
+}
+/* sys_null_call() is only used for determining raw system call
+ * overheads (kernel entry, kernel exit). It has no useful side effects.
+ * If ts is non-NULL, then the current Feather-Trace time is recorded.
+ */
+asmlinkage long sys_null_call(cycles_t __user *ts)
+{
+        long ret = 0;
+        cycles_t now;
+        if (ts) {
+                now = get_cycles();
+                ret = put_user(now, ts);
+        }
+        return ret;
+}
+/* p is a real-time task. Re-init its state as a best-effort task. */
+static void reinit_litmus_state(struct task_struct* p, int restore)
+{
+        struct rt_task  user_config = {};
+        void*  ctrl_page     = NULL;
+        if (restore) {
+                /* Safe user-space provided configuration data.
+                 * and allocated page. */
+                user_config = p->rt_param.task_params;
+                ctrl_page   = p->rt_param.ctrl_page;
+        }
+        /* We probably should not be inheriting any task's priority
+         * at this point in time.
+         */
+        WARN_ON(p->rt_param.inh_task);
+        /* Cleanup everything else. */
+        memset(&p->rt_param, 0, sizeof(p->rt_param));
+        /* Restore preserved fields. */
+        if (restore) {
+                p->rt_param.task_params = user_config;
+                p->rt_param.ctrl_page   = ctrl_page;
+        }
+}
+long litmus_admit_task(struct task_struct* tsk)
+{
+        long retval = 0;
+        BUG_ON(is_realtime(tsk));
+        tsk_rt(tsk)->heap_node = NULL;
+        tsk_rt(tsk)->rel_heap = NULL;
+        if (get_rt_relative_deadline(tsk) == 0 ||
+            get_exec_cost(tsk) >
+                        min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
+                TRACE_TASK(tsk,
+                        "litmus admit: invalid task parameters "
+                        "(e = %lu, p = %lu, d = %lu)\n",
+                        get_exec_cost(tsk), get_rt_period(tsk),
+                        get_rt_relative_deadline(tsk));
+                retval = -EINVAL;
+                goto out;
+        }
+        INIT_LIST_HEAD(&tsk_rt(tsk)->list);
+        /* allocate heap node for this task */
+        tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
+        tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
+        if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
+                printk(KERN_WARNING "litmus: no more heap node memory!?\n");
+                retval = -ENOMEM;
+                goto out;
+        } else {
+                bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
+        }
+        preempt_disable();
+        retval = litmus->admit_task(tsk);
+        if (!retval) {
+                sched_trace_task_name(tsk);
+                sched_trace_task_param(tsk);
+                atomic_inc(&rt_task_count);
+        }
+        preempt_enable();
+out:
+        if (retval) {
+                if (tsk_rt(tsk)->heap_node)
+                        bheap_node_free(tsk_rt(tsk)->heap_node);
+                if (tsk_rt(tsk)->rel_heap)
+                        release_heap_free(tsk_rt(tsk)->rel_heap);
+        }
+        return retval;
+}
+void litmus_clear_state(struct task_struct* tsk)
+{
+    BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
+    bheap_node_free(tsk_rt(tsk)->heap_node);
+    release_heap_free(tsk_rt(tsk)->rel_heap);
+    atomic_dec(&rt_task_count);
+    reinit_litmus_state(tsk, 1);
+}
+/* called from sched_setscheduler() */
+void litmus_exit_task(struct task_struct* tsk)
+{
+        if (is_realtime(tsk)) {
+                sched_trace_task_completion(tsk, 1);
+                litmus->task_exit(tsk);
+        }
+}
+static DECLARE_RWSEM(plugin_switch_mutex);
+void litmus_plugin_switch_disable(void)
+{
+        down_read(&plugin_switch_mutex);
+}
+void litmus_plugin_switch_enable(void)
+{
+        up_read(&plugin_switch_mutex);
+}
+static int __do_plugin_switch(struct sched_plugin* plugin)
+{
+        int ret;
+        /* don't switch if there are active real-time tasks */
+        if (atomic_read(&rt_task_count) == 0) {
+                TRACE("deactivating plugin %s\n", litmus->plugin_name);
+                ret = litmus->deactivate_plugin();
+                if (0 != ret)
+                        goto out;
+                TRACE("activating plugin %s\n", plugin->plugin_name);
+                ret = plugin->activate_plugin();
+                if (0 != ret) {
+                        printk(KERN_INFO "Can't activate %s (%d).\n",
+                               plugin->plugin_name, ret);
+                        plugin = &linux_sched_plugin;
+                }
+                printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
+                litmus = plugin;
+        } else
+                ret = -EBUSY;
+out:
+        TRACE("do_plugin_switch() => %d\n", ret);
+        return ret;
+}
+static atomic_t ready_to_switch;
+static int do_plugin_switch(void *_plugin)
+{
+        unsigned long flags;
+        int ret = 0;
+        local_save_flags(flags);
+        local_irq_disable();
+        hard_irq_disable();
+        if (atomic_dec_and_test(&ready_to_switch))
+        {
+                ret = __do_plugin_switch((struct sched_plugin*) _plugin);
+                atomic_set(&ready_to_switch, INT_MAX);
+        }
+        do {
+                cpu_relax();
+        } while (atomic_read(&ready_to_switch) != INT_MAX);
+        local_irq_restore(flags);
+        return ret;
+}
+/* Switching a plugin in use is tricky.
+ * We must watch out that no real-time tasks exists
+ * (and that none is created in parallel) and that the plugin is not
+ * currently in use on any processor (in theory).
+ */
+int switch_sched_plugin(struct sched_plugin* plugin)
+{
+        int err;
+        struct domain_proc_info* domain_info;
+        BUG_ON(!plugin);
+        if (atomic_read(&rt_task_count) == 0) {
+                down_write(&plugin_switch_mutex);
+                deactivate_domain_proc();
+                get_online_cpus();
+                atomic_set(&ready_to_switch, num_online_cpus());
+                err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
+                put_online_cpus();
+                if (!litmus->get_domain_proc_info(&domain_info))
+                        activate_domain_proc(domain_info);
+                up_write(&plugin_switch_mutex);
+                return err;
+        } else
+                return -EBUSY;
+}
+/* Called upon fork.
+ * p is the newly forked task.
+ */
+void litmus_fork(struct task_struct* p)
+{
+        if (is_realtime(p)) {
+                /* clean out any litmus related state, don't preserve anything */
+                reinit_litmus_state(p, 0);
+                /* Don't let the child be a real-time task.  */
+                p->sched_reset_on_fork = 1;
+        } else
+                /* non-rt tasks might have ctrl_page set */
+                tsk_rt(p)->ctrl_page = NULL;
+        /* od tables are never inherited across a fork */
+        p->od_table = NULL;
+}
+/* Called upon execve().
+ * current is doing the exec.
+ * Don't let address space specific stuff leak.
+ */
+void litmus_exec(void)
+{
+        struct task_struct* p = current;
+        if (is_realtime(p)) {
+                WARN_ON(p->rt_param.inh_task);
+                if (tsk_rt(p)->ctrl_page) {
+                        free_page((unsigned long) tsk_rt(p)->ctrl_page);
+                        tsk_rt(p)->ctrl_page = NULL;
+                }
+        }
+}
+/* Called when dead_tsk is being deallocated
+ */
+void exit_litmus(struct task_struct *dead_tsk)
+{
+        /* We also allow non-RT tasks to
+         * allocate control pages to allow
+         * measurements with non-RT tasks.
+         * So check if we need to free the page
+         * in any case.
+         */
+        if (tsk_rt(dead_tsk)->ctrl_page) {
+                TRACE_TASK(dead_tsk,
+                           "freeing ctrl_page %p\n",
+                           tsk_rt(dead_tsk)->ctrl_page);
+                free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
+        }
+        /* Tasks should not be real-time tasks any longer at this point. */
+        BUG_ON(is_realtime(dead_tsk));
+}
+void litmus_do_exit(struct task_struct *exiting_tsk)
+{
+        /* This task called do_exit(), but is still a real-time task. To avoid
+         * complications later, we force it to be a non-real-time task now. */
+        struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+        TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
+        sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
+}
+void litmus_dealloc(struct task_struct *tsk)
+{
+        /* tsk is no longer a real-time task */
+        TRACE_TASK(tsk, "Deallocating real-time task data\n");
+        litmus->task_cleanup(tsk);
+        litmus_clear_state(tsk);
+}
+/* move current non-RT task to a specific CPU */
+int litmus_be_migrate_to(int cpu)
+{
+        struct cpumask single_cpu_aff;
+        cpumask_clear(&single_cpu_aff);
+        cpumask_set_cpu(cpu, &single_cpu_aff);
+        return sched_setaffinity(current->pid, &single_cpu_aff);
+}
+#ifdef CONFIG_MAGIC_SYSRQ
+int sys_kill(int pid, int sig);
+static void sysrq_handle_kill_rt_tasks(int key)
+{
+        struct task_struct *t;
+        read_lock(&tasklist_lock);
+        for_each_process(t) {
+                if (is_realtime(t)) {
+                        sys_kill(t->pid, SIGKILL);
+                }
+        }
+        read_unlock(&tasklist_lock);
+}
+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
+        .handler        = sysrq_handle_kill_rt_tasks,
+        .help_msg       = "quit-rt-tasks(X)",
+        .action_msg     = "sent SIGKILL to all LITMUS^RT real-time tasks",
+};
+#endif
+extern struct sched_plugin linux_sched_plugin;
+static int litmus_shutdown_nb(struct notifier_block *unused1,
+                                unsigned long unused2, void *unused3)
+{
+        /* Attempt to switch back to regular Linux scheduling.
+         * Forces the active plugin to clean up.
+         */
+        if (litmus != &linux_sched_plugin) {
+                int ret = switch_sched_plugin(&linux_sched_plugin);
+                if (ret) {
+                        printk("Auto-shutdown of active Litmus plugin failed.\n");
+                }
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block shutdown_notifier = {
+        .notifier_call = litmus_shutdown_nb,
+};
+static int __init _init_litmus(void)
+{
+        /*      Common initializers,
+         *      mode change lock is used to enforce single mode change
+         *      operation.
+         */
+        printk("Starting LITMUS^RT kernel\n");
+        register_sched_plugin(&linux_sched_plugin);
+        bheap_node_cache    = KMEM_CACHE(bheap_node, SLAB_PANIC);
+        release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
+#ifdef CONFIG_MAGIC_SYSRQ
+        /* offer some debugging help */
+        if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
+                printk("Registered kill rt tasks magic sysrq.\n");
+        else
+                printk("Could not register kill rt tasks magic sysrq.\n");
+#endif
+        init_litmus_proc();
+        register_reboot_notifier(&shutdown_notifier);
+        return 0;
+}
+static void _exit_litmus(void)
+{
+        unregister_reboot_notifier(&shutdown_notifier);
+        exit_litmus_proc();
+        kmem_cache_destroy(bheap_node_cache);
+        kmem_cache_destroy(release_heap_cache);
+}
+module_init(_init_litmus);
+module_exit(_exit_litmus);
author	Bjoern Brandenburg <bbb@mpi-sws.org>	2015-08-09 07:18:48 -0400
committer	Bjoern Brandenburg <bbb@mpi-sws.org>	2015-08-09 06:21:18 -0400
commit	8e048c798adaabef530a1526f7ce8c6c3cd3475e (patch)
tree	5a96b3eaeaafecec1bf08ba71a9d0084d39d46eb /litmus/litmus.c
parent	bd175e94795774908317a861a883761b75750e35 (diff)

diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 000000000000..703360c68609 --- /dev/null +++ b/litmus/litmus.c
@@ -0,0 +1,681 @@
	1	/*
	2	* litmus.c -- Implementation of the LITMUS syscalls,
	3	* the LITMUS intialization code,
	4	* and the procfs interface..
	5	*/
	6	#include <asm/uaccess.h>
	7	#include <linux/uaccess.h>
	8	#include <linux/sysrq.h>
	9	#include <linux/sched.h>
	10	#include <linux/module.h>
	11	#include <linux/slab.h>
	12	#include <linux/reboot.h>
	13	#include <linux/stop_machine.h>
	14	#include <linux/sched/rt.h>
	15	#include <linux/rwsem.h>
	16	#include <linux/interrupt.h>
	17
	18	#include <litmus/litmus.h>
	19	#include <litmus/bheap.h>
	20	#include <litmus/trace.h>
	21	#include <litmus/rt_domain.h>
	22	#include <litmus/litmus_proc.h>
	23	#include <litmus/sched_trace.h>
	24
	25	#ifdef CONFIG_SCHED_CPU_AFFINITY
	26	#include <litmus/affinity.h>
	27	#endif
	28
	29	/* Number of RT tasks that exist in the system */
	30	atomic_t rt_task_count = ATOMIC_INIT(0);
	31
	32	#ifdef CONFIG_RELEASE_MASTER
	33	/* current master CPU for handling timer IRQs */
	34	atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
	35	#endif
	36
	37	static struct kmem_cache * bheap_node_cache;
	38	extern struct kmem_cache * release_heap_cache;
	39
	40	struct bheap_node* bheap_node_alloc(int gfp_flags)
	41	{
	42	return kmem_cache_alloc(bheap_node_cache, gfp_flags);
	43	}
	44
	45	void bheap_node_free(struct bheap_node* hn)
	46	{
	47	kmem_cache_free(bheap_node_cache, hn);
	48	}
	49
	50	struct release_heap* release_heap_alloc(int gfp_flags);
	51	void release_heap_free(struct release_heap* rh);
	52
	53	/**
	54	* Get the quantum alignment as a cmdline option.
	55	* Default is staggered quanta, as this results in lower overheads.
	56	*/
	57	static bool aligned_quanta = 0;
	58	module_param(aligned_quanta, bool, 0644);
	59
	60	u64 cpu_stagger_offset(int cpu)
	61	{
	62	u64 offset = 0;
	63
	64	if (!aligned_quanta) {
	65	offset = LITMUS_QUANTUM_LENGTH_NS;
	66	do_div(offset, num_possible_cpus());
	67	offset *= cpu;
	68	}
	69	return offset;
	70	}
	71
	72	/*
	73	* sys_set_task_rt_param
	74	* @pid: Pid of the task which scheduling parameters must be changed
	75	* @param: New real-time extension parameters such as the execution cost and
	76	* period
	77	* Syscall for manipulating with task rt extension params
	78	* Returns EFAULT if param is NULL.
	79	* ESRCH if pid is not corrsponding
	80	* to a valid task.
	81	* EINVAL if either period or execution cost is <=0
	82	* EPERM if pid is a real-time task
	83	* 0 if success
	84	*
	85	* Only non-real-time tasks may be configured with this system call
	86	* to avoid races with the scheduler. In practice, this means that a
	87	* task's parameters must be set _before_ calling sys_prepare_rt_task()
	88	*
	89	* find_task_by_vpid() assumes that we are in the same namespace of the
	90	* target.
	91	*/
	92	asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
	93	{
	94	struct rt_task tp;
	95	struct task_struct *target;
	96	int retval = -EINVAL;
	97
	98	printk("Setting up rt task parameters for process %d.\n", pid);
	99
	100	if (pid < 0 \|\| param == 0) {
	101	goto out;
	102	}
	103	if (copy_from_user(&tp, param, sizeof(tp))) {
	104	retval = -EFAULT;
	105	goto out;
	106	}
	107
	108	/* Task search and manipulation must be protected */
	109	read_lock_irq(&tasklist_lock);
	110	rcu_read_lock();
	111	if (!(target = find_task_by_vpid(pid))) {
	112	retval = -ESRCH;
	113	rcu_read_unlock();
	114	goto out_unlock;
	115	}
	116	rcu_read_unlock();
	117
	118	if (is_realtime(target)) {
	119	/* The task is already a real-time task.
	120	* We cannot not allow parameter changes at this point.
	121	*/
	122	retval = -EBUSY;
	123	goto out_unlock;
	124	}
	125
	126	/* set relative deadline to be implicit if left unspecified */
	127	if (tp.relative_deadline == 0)
	128	tp.relative_deadline = tp.period;
	129
	130	if (tp.exec_cost <= 0)
	131	goto out_unlock;
	132	if (tp.period <= 0)
	133	goto out_unlock;
	134	if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /density check/
	135	{
	136	printk(KERN_INFO "litmus: real-time task %d rejected "
	137	"because task density > 1.0\n", pid);
	138	goto out_unlock;
	139	}
	140	if (tp.cls != RT_CLASS_HARD &&
	141	tp.cls != RT_CLASS_SOFT &&
	142	tp.cls != RT_CLASS_BEST_EFFORT)
	143	{
	144	printk(KERN_INFO "litmus: real-time task %d rejected "
	145	"because its class is invalid\n", pid);
	146	goto out_unlock;
	147	}
	148	if (tp.budget_policy != NO_ENFORCEMENT &&
	149	tp.budget_policy != QUANTUM_ENFORCEMENT &&
	150	tp.budget_policy != PRECISE_ENFORCEMENT)
	151	{
	152	printk(KERN_INFO "litmus: real-time task %d rejected "
	153	"because unsupported budget enforcement policy "
	154	"specified (%d)\n",
	155	pid, tp.budget_policy);
	156	goto out_unlock;
	157	}
	158
	159	target->rt_param.task_params = tp;
	160
	161	retval = 0;
	162	out_unlock:
	163	read_unlock_irq(&tasklist_lock);
	164	out:
	165	return retval;
	166	}
	167
	168	/*
	169	* Getter of task's RT params
	170	* returns EINVAL if param or pid is NULL
	171	* returns ESRCH if pid does not correspond to a valid task
	172	* returns EFAULT if copying of parameters has failed.
	173	*
	174	* find_task_by_vpid() assumes that we are in the same namespace of the
	175	* target.
	176	*/
	177	asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
	178	{
	179	int retval = -EINVAL;
	180	struct task_struct *source;
	181	struct rt_task lp;
	182	if (param == 0 \|\| pid < 0)
	183	goto out;
	184	read_lock(&tasklist_lock);
	185	if (!(source = find_task_by_vpid(pid))) {
	186	retval = -ESRCH;
	187	goto out_unlock;
	188	}
	189	lp = source->rt_param.task_params;
	190	read_unlock(&tasklist_lock);
	191	/* Do copying outside the lock */
	192	retval =
	193	copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
	194	return retval;
	195	out_unlock:
	196	read_unlock(&tasklist_lock);
	197	out:
	198	return retval;
	199
	200	}
	201
	202	/*
	203	* This is the crucial function for periodic task implementation,
	204	* It checks if a task is periodic, checks if such kind of sleep
	205	* is permitted and calls plugin-specific sleep, which puts the
	206	* task into a wait array.
	207	* returns 0 on successful wakeup
	208	* returns EPERM if current conditions do not permit such sleep
	209	* returns EINVAL if current task is not able to go to sleep
	210	*/
	211	asmlinkage long sys_complete_job(void)
	212	{
	213	int retval = -EPERM;
	214	if (!is_realtime(current)) {
	215	retval = -EINVAL;
	216	goto out;
	217	}
	218	/* Task with negative or zero period cannot sleep */
	219	if (get_rt_period(current) <= 0) {
	220	retval = -EINVAL;
	221	goto out;
	222	}
	223	/* The plugin has to put the task into an
	224	* appropriate queue and call schedule
	225	*/
	226	retval = litmus->complete_job();
	227	out:
	228	return retval;
	229	}
	230
	231	/* This is an "improved" version of sys_complete_job that
	232	* addresses the problem of unintentionally missing a job after
	233	* an overrun.
	234	*
	235	* returns 0 on successful wakeup
	236	* returns EPERM if current conditions do not permit such sleep
	237	* returns EINVAL if current task is not able to go to sleep
	238	*/
	239	asmlinkage long sys_wait_for_job_release(unsigned int job)
	240	{
	241	int retval = -EPERM;
	242	if (!is_realtime(current)) {
	243	retval = -EINVAL;
	244	goto out;
	245	}
	246
	247	/* Task with negative or zero period cannot sleep */
	248	if (get_rt_period(current) <= 0) {
	249	retval = -EINVAL;
	250	goto out;
	251	}
	252
	253	retval = 0;
	254
	255	/* first wait until we have "reached" the desired job
	256	*
	257	* This implementation has at least two problems:
	258	*
	259	* 1) It doesn't gracefully handle the wrap around of
	260	* job_no. Since LITMUS is a prototype, this is not much
	261	* of a problem right now.
	262	*
	263	* 2) It is theoretically racy if a job release occurs
	264	* between checking job_no and calling sleep_next_period().
	265	* A proper solution would requiring adding another callback
	266	* in the plugin structure and testing the condition with
	267	* interrupts disabled.
	268	*
	269	* FIXME: At least problem 2 should be taken care of eventually.
	270	*/
	271	while (!retval && job > current->rt_param.job_params.job_no)
	272	/* If the last job overran then job <= job_no and we
	273	* don't send the task to sleep.
	274	*/
	275	retval = litmus->complete_job();
	276	out:
	277	return retval;
	278	}
	279
	280	/* This is a helper syscall to query the current job sequence number.
	281	*
	282	* returns 0 on successful query
	283	* returns EPERM if task is not a real-time task.
	284	* returns EFAULT if &job is not a valid pointer.
	285	*/
	286	asmlinkage long sys_query_job_no(unsigned int __user *job)
	287	{
	288	int retval = -EPERM;
	289	if (is_realtime(current))
	290	retval = put_user(current->rt_param.job_params.job_no, job);
	291
	292	return retval;
	293	}
	294
	295	/* sys_null_call() is only used for determining raw system call
	296	* overheads (kernel entry, kernel exit). It has no useful side effects.
	297	* If ts is non-NULL, then the current Feather-Trace time is recorded.
	298	*/
	299	asmlinkage long sys_null_call(cycles_t __user *ts)
	300	{
	301	long ret = 0;
	302	cycles_t now;
	303
	304	if (ts) {
	305	now = get_cycles();
	306	ret = put_user(now, ts);
	307	}
	308
	309	return ret;
	310	}
	311
	312	/* p is a real-time task. Re-init its state as a best-effort task. */
	313	static void reinit_litmus_state(struct task_struct* p, int restore)
	314	{
	315	struct rt_task user_config = {};
	316	void* ctrl_page = NULL;
	317
	318	if (restore) {
	319	/* Safe user-space provided configuration data.
	320	* and allocated page. */
	321	user_config = p->rt_param.task_params;
	322	ctrl_page = p->rt_param.ctrl_page;
	323	}
	324
	325	/* We probably should not be inheriting any task's priority
	326	* at this point in time.
	327	*/
	328	WARN_ON(p->rt_param.inh_task);
	329
	330	/* Cleanup everything else. */
	331	memset(&p->rt_param, 0, sizeof(p->rt_param));
	332
	333	/* Restore preserved fields. */
	334	if (restore) {
	335	p->rt_param.task_params = user_config;
	336	p->rt_param.ctrl_page = ctrl_page;
	337	}
	338	}
	339
	340	long litmus_admit_task(struct task_struct* tsk)
	341	{
	342	long retval = 0;
	343
	344	BUG_ON(is_realtime(tsk));
	345
	346	tsk_rt(tsk)->heap_node = NULL;
	347	tsk_rt(tsk)->rel_heap = NULL;
	348
	349	if (get_rt_relative_deadline(tsk) == 0 \|\|
	350	get_exec_cost(tsk) >
	351	min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
	352	TRACE_TASK(tsk,
	353	"litmus admit: invalid task parameters "
	354	"(e = %lu, p = %lu, d = %lu)\n",
	355	get_exec_cost(tsk), get_rt_period(tsk),
	356	get_rt_relative_deadline(tsk));
	357	retval = -EINVAL;
	358	goto out;
	359	}
	360
	361	INIT_LIST_HEAD(&tsk_rt(tsk)->list);
	362
	363	/* allocate heap node for this task */
	364	tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
	365	tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
	366
	367	if (!tsk_rt(tsk)->heap_node \|\| !tsk_rt(tsk)->rel_heap) {
	368	printk(KERN_WARNING "litmus: no more heap node memory!?\n");
	369
	370	retval = -ENOMEM;
	371	goto out;
	372	} else {
	373	bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
	374	}
	375
	376	preempt_disable();
	377
	378	retval = litmus->admit_task(tsk);
	379
	380	if (!retval) {
	381	sched_trace_task_name(tsk);
	382	sched_trace_task_param(tsk);
	383	atomic_inc(&rt_task_count);
	384	}
	385
	386	preempt_enable();
	387
	388	out:
	389	if (retval) {
	390	if (tsk_rt(tsk)->heap_node)
	391	bheap_node_free(tsk_rt(tsk)->heap_node);
	392	if (tsk_rt(tsk)->rel_heap)
	393	release_heap_free(tsk_rt(tsk)->rel_heap);
	394	}
	395	return retval;
	396	}
	397
	398	void litmus_clear_state(struct task_struct* tsk)
	399	{
	400	BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
	401	bheap_node_free(tsk_rt(tsk)->heap_node);
	402	release_heap_free(tsk_rt(tsk)->rel_heap);
	403
	404	atomic_dec(&rt_task_count);
	405	reinit_litmus_state(tsk, 1);
	406	}
	407
	408	/* called from sched_setscheduler() */
	409	void litmus_exit_task(struct task_struct* tsk)
	410	{
	411	if (is_realtime(tsk)) {
	412	sched_trace_task_completion(tsk, 1);
	413
	414	litmus->task_exit(tsk);
	415	}
	416	}
	417
	418	static DECLARE_RWSEM(plugin_switch_mutex);
	419
	420	void litmus_plugin_switch_disable(void)
	421	{
	422	down_read(&plugin_switch_mutex);
	423	}
	424
	425	void litmus_plugin_switch_enable(void)
	426	{
	427	up_read(&plugin_switch_mutex);
	428	}
	429
	430	static int __do_plugin_switch(struct sched_plugin* plugin)
	431	{
	432	int ret;
	433
	434
	435	/* don't switch if there are active real-time tasks */
	436	if (atomic_read(&rt_task_count) == 0) {
	437	TRACE("deactivating plugin %s\n", litmus->plugin_name);
	438	ret = litmus->deactivate_plugin();
	439	if (0 != ret)
	440	goto out;
	441
	442	TRACE("activating plugin %s\n", plugin->plugin_name);
	443	ret = plugin->activate_plugin();
	444	if (0 != ret) {
	445	printk(KERN_INFO "Can't activate %s (%d).\n",
	446	plugin->plugin_name, ret);
	447	plugin = &linux_sched_plugin;
	448	}
	449
	450	printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
	451	litmus = plugin;
	452	} else
	453	ret = -EBUSY;
	454	out:
	455	TRACE("do_plugin_switch() => %d\n", ret);
	456	return ret;
	457	}
	458
	459	static atomic_t ready_to_switch;
	460
	461	static int do_plugin_switch(void *_plugin)
	462	{
	463	unsigned long flags;
	464	int ret = 0;
	465
	466	local_save_flags(flags);
	467	local_irq_disable();
	468	hard_irq_disable();
	469
	470	if (atomic_dec_and_test(&ready_to_switch))
	471	{
	472	ret = __do_plugin_switch((struct sched_plugin*) _plugin);
	473	atomic_set(&ready_to_switch, INT_MAX);
	474	}
	475
	476	do {
	477	cpu_relax();
	478	} while (atomic_read(&ready_to_switch) != INT_MAX);
	479
	480	local_irq_restore(flags);
	481	return ret;
	482	}
	483
	484	/* Switching a plugin in use is tricky.
	485	* We must watch out that no real-time tasks exists
	486	* (and that none is created in parallel) and that the plugin is not
	487	* currently in use on any processor (in theory).
	488	*/
	489	int switch_sched_plugin(struct sched_plugin* plugin)
	490	{
	491	int err;
	492	struct domain_proc_info* domain_info;
	493
	494	BUG_ON(!plugin);
	495
	496	if (atomic_read(&rt_task_count) == 0) {
	497	down_write(&plugin_switch_mutex);
	498
	499	deactivate_domain_proc();
	500
	501	get_online_cpus();
	502	atomic_set(&ready_to_switch, num_online_cpus());
	503	err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
	504	put_online_cpus();
	505
	506	if (!litmus->get_domain_proc_info(&domain_info))
	507	activate_domain_proc(domain_info);
	508
	509	up_write(&plugin_switch_mutex);
	510	return err;
	511	} else
	512	return -EBUSY;
	513	}
	514
	515	/* Called upon fork.
	516	* p is the newly forked task.
	517	*/
	518	void litmus_fork(struct task_struct* p)
	519	{
	520	if (is_realtime(p)) {
	521	/* clean out any litmus related state, don't preserve anything */
	522	reinit_litmus_state(p, 0);
	523	/* Don't let the child be a real-time task. */
	524	p->sched_reset_on_fork = 1;
	525	} else
	526	/* non-rt tasks might have ctrl_page set */
	527	tsk_rt(p)->ctrl_page = NULL;
	528
	529	/* od tables are never inherited across a fork */
	530	p->od_table = NULL;
	531	}
	532
	533	/* Called upon execve().
	534	* current is doing the exec.
	535	* Don't let address space specific stuff leak.
	536	*/
	537	void litmus_exec(void)
	538	{
	539	struct task_struct* p = current;
	540
	541	if (is_realtime(p)) {
	542	WARN_ON(p->rt_param.inh_task);
	543	if (tsk_rt(p)->ctrl_page) {
	544	free_page((unsigned long) tsk_rt(p)->ctrl_page);
	545	tsk_rt(p)->ctrl_page = NULL;
	546	}
	547	}
	548	}
	549
	550	/* Called when dead_tsk is being deallocated
	551	*/
	552	void exit_litmus(struct task_struct *dead_tsk)
	553	{
	554	/* We also allow non-RT tasks to
	555	* allocate control pages to allow
	556	* measurements with non-RT tasks.
	557	* So check if we need to free the page
	558	* in any case.
	559	*/
	560	if (tsk_rt(dead_tsk)->ctrl_page) {
	561	TRACE_TASK(dead_tsk,
	562	"freeing ctrl_page %p\n",
	563	tsk_rt(dead_tsk)->ctrl_page);
	564	free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
	565	}
	566
	567	/* Tasks should not be real-time tasks any longer at this point. */
	568	BUG_ON(is_realtime(dead_tsk));
	569	}
	570
	571	void litmus_do_exit(struct task_struct *exiting_tsk)
	572	{
	573	/* This task called do_exit(), but is still a real-time task. To avoid
	574	* complications later, we force it to be a non-real-time task now. */
	575
	576	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	577
	578	TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
	579	sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
	580	}
	581
	582	void litmus_dealloc(struct task_struct *tsk)
	583	{
	584	/* tsk is no longer a real-time task */
	585	TRACE_TASK(tsk, "Deallocating real-time task data\n");
	586	litmus->task_cleanup(tsk);
	587	litmus_clear_state(tsk);
	588	}
	589
	590	/* move current non-RT task to a specific CPU */
	591	int litmus_be_migrate_to(int cpu)
	592	{
	593	struct cpumask single_cpu_aff;
	594
	595	cpumask_clear(&single_cpu_aff);
	596	cpumask_set_cpu(cpu, &single_cpu_aff);
	597	return sched_setaffinity(current->pid, &single_cpu_aff);
	598	}
	599
	600	#ifdef CONFIG_MAGIC_SYSRQ
	601	int sys_kill(int pid, int sig);
	602
	603	static void sysrq_handle_kill_rt_tasks(int key)
	604	{
	605	struct task_struct *t;
	606	read_lock(&tasklist_lock);
	607	for_each_process(t) {
	608	if (is_realtime(t)) {
	609	sys_kill(t->pid, SIGKILL);
	610	}
	611	}
	612	read_unlock(&tasklist_lock);
	613	}
	614
	615	static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
	616	.handler = sysrq_handle_kill_rt_tasks,
	617	.help_msg = "quit-rt-tasks(X)",
	618	.action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
	619	};
	620	#endif
	621
	622	extern struct sched_plugin linux_sched_plugin;
	623
	624	static int litmus_shutdown_nb(struct notifier_block *unused1,
	625	unsigned long unused2, void *unused3)
	626	{
	627	/* Attempt to switch back to regular Linux scheduling.
	628	* Forces the active plugin to clean up.
	629	*/
	630	if (litmus != &linux_sched_plugin) {
	631	int ret = switch_sched_plugin(&linux_sched_plugin);
	632	if (ret) {
	633	printk("Auto-shutdown of active Litmus plugin failed.\n");
	634	}
	635	}
	636	return NOTIFY_DONE;
	637	}
	638
	639	static struct notifier_block shutdown_notifier = {
	640	.notifier_call = litmus_shutdown_nb,
	641	};
	642
	643	static int __init _init_litmus(void)
	644	{
	645	/* Common initializers,
	646	* mode change lock is used to enforce single mode change
	647	* operation.
	648	*/
	649	printk("Starting LITMUS^RT kernel\n");
	650
	651	register_sched_plugin(&linux_sched_plugin);
	652
	653	bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
	654	release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
	655
	656	#ifdef CONFIG_MAGIC_SYSRQ
	657	/* offer some debugging help */
	658	if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
	659	printk("Registered kill rt tasks magic sysrq.\n");
	660	else
	661	printk("Could not register kill rt tasks magic sysrq.\n");
	662	#endif
	663
	664	init_litmus_proc();
	665
	666	register_reboot_notifier(&shutdown_notifier);
	667
	668	return 0;
	669	}
	670
	671	static void _exit_litmus(void)
	672	{
	673	unregister_reboot_notifier(&shutdown_notifier);
	674
	675	exit_litmus_proc();
	676	kmem_cache_destroy(bheap_node_cache);
	677	kmem_cache_destroy(release_heap_cache);
	678	}
	679
	680	module_init(_init_litmus);
	681	module_exit(_exit_litmus);