Partial cyclic executive plugin.

This is not tested and missing everything for /proc. I just am checking it in so that it the code is backed up in a git repository.
author: Christopher Kenna <cjk@cs.unc.edu> 2011-09-05 22:57:09 -0400
committer: Christopher Kenna <cjk@cs.unc.edu> 2011-09-05 22:57:09 -0400
commit: 1d516ebe47adcc6998f6bb8dbee7942e20f6eaf9 (patch)
tree: 4b93ef3d9bfe31295435823f5565778aa83fa0c8
parent: d5e965b0074d6ef10f5a77112fc3671613a2150f (diff)
5 files changed, 517 insertions, 5 deletions
diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h
index 66910773f430..266f89172f19 100644
--- a/include/litmus/sched_mc.h
+++ b/include/litmus/sched_mc.h
@@ -15,6 +15,7 @@ enum crit_level {
 struct mc_task {
        enum crit_level crit;
+        int lvl_a_id;
 };
 struct mc_job {
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 9a1cc2436580..b8f6a9159eb2 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -32,6 +32,14 @@ config PLUGIN_MC
       If unsure, say Yes.
+config PLUGIN_MC_LEVEL_A_MAX_TASKS
+       int "Maximum level A tasks"
+       depends on PLUGIN_MC
+       range 1 128
+       default 32
+       help
+       The maximum number of level A tasks allowed (per-cpu) in level A.
 config RELEASE_MASTER
        bool "Release-master Support"
        depends on ARCH_HAS_SEND_PULL_TIMERS
diff --git a/litmus/Makefile b/litmus/Makefile
index 7d7003592138..03dc31a12711 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -22,7 +22,7 @@ obj-y     = sched_plugin.o litmus.o \
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
-obj-$(CONFIG_PLUGIN_MC) += sched_mc.o
+obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 16b3aeda5615..7db9fdadc7db 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -314,17 +314,25 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param
                goto out_unlock;
        }
+        /* check parameters passed in are valid */
        if (mc.crit < CRIT_LEVEL_A || mc.crit > CRIT_LEVEL_D)
        {
                printk(KERN_WARNING "litmus: real-time task %d rejected because "
                        "of invalid criticality level\n", pid);
                goto out_unlock;
        }
+        if (CRIT_LEVEL_A == mc.crit &&
+                        (mc.lvl_a_id < 0 ||
+                         mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS))
+        {
+                printk(KERN_WARNING "litmus: real-time task %d rejected because "
+                        "of invalid level A id\n", pid);
+                goto out_unlock;
+        }
        mc_data = tsk_rt(target)->mc_data;
-        if (!mc_data)
+        if (!mc_data) {
-        {
+                mc_data = kzalloc(sizeof(*mc_data), GFP_ATOMIC);
-                mc_data = kmalloc(sizeof(*mc_data), GFP_ATOMIC);
                if (!mc_data)
                {
                        retval = -ENOMEM;
@@ -332,8 +340,8 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param
                }
                tsk_rt(target)->mc_data = mc_data;
        }
-        mc_data->mc_task.crit = mc.crit;
+        mc_data->mc_task = mc;
        retval = 0;
 out_unlock:
        read_unlock_irq(&tasklist_lock);
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
new file mode 100644
index 000000000000..21d1d8789c2b
--- /dev/null
+++ b/litmus/sched_mc_ce.c
@@ -0,0 +1,495 @@
+/**
+ * litmus/sched_mc_ce.c
+ *
+ * The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
+ * algorithm.
+ */
+#include <asm/atomic.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/rt_domain.h>
+#include <litmus/rt_param.h>
+#include <litmus/sched_mc.h>
+static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
+#define tsk_mc_data(t) (tsk_rt(t)->mc_data)
+#define tsk_mc_crit(t) (tsk_mc_data(t)->mc_task.crit)
+#define is_active_plugin() (litmus == mc_ce_plugin)
+static atomic_t start_time_set = ATOMIC_INIT(0);
+static atomic64_t start_time = ATOMIC64_INIT(0);
+/*
+ * Cache the budget along with the struct PID for a task so that we don't need
+ * to fetch its task_struct every time we check to see what should be
+ * scheduled.
+ */
+struct ce_dom_pid_entry {
+        struct pid *pid;
+        lt_t exec_cost;
+        lt_t acc_time;
+};
+struct ce_dom_data {
+        int cpu;
+        struct task_struct *scheduled, *should_schedule;
+        /*
+         * Each CPU needs a mapping of level A ID (integer) to struct pid so
+         * that we can get its task struct.
+         */
+        struct ce_dom_pid_entry pid_entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
+        int num_pid_entries;
+        lt_t cycle_time;
+        struct hrtimer_start_on_info timer_info;
+        struct hrtimer timer;
+};
+DEFINE_PER_CPU(domain_t, mc_ce_doms);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
+/*
+ * Return the index into the PID entries table of what to schedule next.
+ * Don't call if the table is empty. Assumes the caller has the domain lock.
+ *
+ * TODO Currently O(n) in the number of tasks on the CPU. Binary search?
+ */
+static int mc_ce_schedule_at(const lt_t when, const domain_t *dom)
+{
+        const struct ce_dom_data *ce_data = dom->data;
+        const struct ce_dom_pid_entry *pid_entry = NULL;
+        lt_t offset;
+        int i;
+        BUG_ON(ce_data->cycle_time < 1);
+        BUG_ON(ce_data->num_pid_entries < 1);
+        offset = (when - atomic64_read(&start_time)) % ce_data->cycle_time;
+        for (i = 0; i < ce_data->num_pid_entries; ++i) {
+                pid_entry = &ce_data->pid_entries[i];
+                if (offset < pid_entry->acc_time) {
+                        /* found task to schedule in this window */
+                        break;
+                }
+        }
+        /* can only happen if cycle_time is not right */
+        BUG_ON(pid_entry->acc_time > ce_data->cycle_time);
+        return i;
+}
+static struct task_struct *mc_ce_schedule(struct task_struct *prev)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct ce_dom_data *ce_data = dom->data;
+        struct task_struct *next = NULL;
+        /* for states */
+        int exists, np, preempt;
+        raw_spin_lock(dom->lock);
+        /* sanity checking */
+        BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
+        BUG_ON(ce_data->scheduled && !is_realtime(prev));
+        BUG_ON(is_realtime(prev) && !ce_data->scheduled);
+        /* figure out state */
+        exists  = ce_data->scheduled != NULL;
+        np      = exists && is_np(ce_data->scheduled);
+        preempt = ce_data->scheduled != ce_data->should_schedule;
+        if (np) {
+                /* scheduled real time task needs to continue */
+                request_exit_np(ce_data->scheduled);
+                next = prev;
+        } else if (ce_data->should_schedule &&
+                        is_running(ce_data->should_schedule)) {
+                /* schedule the task for this period if it's not blocked */
+                        next = ce_data->should_schedule;
+        }
+        sched_state_task_picked();
+        raw_spin_unlock(dom->lock);
+        return next;
+}
+static void mc_ce_finish_switch(struct task_struct *prev)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct ce_dom_data *ce_data = dom->data;
+        if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
+                ce_data->scheduled = current;
+        else
+                ce_data->scheduled = NULL;
+}
+/*
+ * Called for every local timer interrupt.
+ * Linux calls this with interrupts disabled, AFAIK.
+ */
+static void mc_ce_tick(struct task_struct *ts)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct ce_dom_data *ce_data = dom->data;
+        struct task_struct *should_schedule;
+        if (is_realtime(ts) && CRIT_LEVEL_A == tsk_mc_crit(ts)) {
+                raw_spin_lock(dom->lock);
+                should_schedule = ce_data->should_schedule;
+                raw_spin_unlock(dom->lock);
+                if (!is_np(ts) && ts != should_schedule) {
+                        litmus_reschedule_local();
+                } else if (is_user_np(ts)) {
+                        request_exit_np(ts);
+                }
+        }
+}
+/*
+ * Admit task called to see if this task is permitted to enter the system.
+ * Here we look up the task's PID structure and save it in the proper slot on
+ * the CPU this task will run on.
+ */
+static long mc_ce_admit_task(struct task_struct *ts)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+        struct ce_dom_data *ce_data = dom->data;
+        struct mc_data *mcd = tsk_mc_data(ts);
+        struct pid *pid = NULL;
+        long retval = -EINVAL;
+        unsigned long flags;
+        const int lvl_a_id = mcd->mc_task.lvl_a_id;
+        /* check the task has migrated to the right CPU (like in sched_cedf) */
+        if (task_cpu(ts) != get_partition(ts)) {
+                printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
+                                ts->pid, task_cpu(ts), get_partition(ts));
+                goto out;
+        }
+        /* only level A tasks can be CE */
+        if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) {
+                printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
+                                ts->pid);
+                goto out;
+        }
+        /* try and get the task's PID structure */
+        pid = get_task_pid(ts, PIDTYPE_PID);
+        if (IS_ERR_OR_NULL(pid)) {
+                printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
+                                ts->pid);
+                goto out;
+        }
+        raw_spin_lock_irqsave(dom->lock, flags);
+        if (lvl_a_id >= ce_data->num_pid_entries) {
+                printk(KERN_INFO "litmus: level A id greater than expected "
+                                "number of tasks %d for %d cpu %d\n",
+                                ce_data->num_pid_entries, ts->pid,
+                                get_partition(ts));
+                goto out_put_pid_unlock;
+        }
+        if (ce_data->pid_entries[lvl_a_id].pid) {
+                printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
+                                lvl_a_id, get_partition(ts));
+                goto out_put_pid_unlock;
+        }
+        if (get_exec_cost(ts) != ce_data->pid_entries[lvl_a_id].exec_cost) {
+                printk(KERN_INFO "litmus: saved exec cost %llu and task exec "
+                                "cost %llu differ\n",
+                                ce_data->pid_entries[lvl_a_id].exec_cost,
+                                get_exec_cost(ts));
+                goto out_put_pid_unlock;
+        }
+        ce_data->pid_entries[lvl_a_id].pid = pid;
+        retval = 0;
+        /* don't call put_pid if we are successful */
+        goto out_unlock;
+out_put_pid_unlock:
+        put_pid(pid);
+out_unlock:
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+out:
+        return retval;
+}
+/*
+ * Called to set up a new real-time task (after the admit_task callback).
+ * At this point the task's struct PID is already hooked up on the destination
+ * CPU. The task may already be running.
+ */
+static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, task_cpu(ts));
+        struct ce_dom_data *ce_data = dom->data;
+        struct pid *pid_should_be_running;
+        unsigned long flags;
+        int idx, should_be_running;
+        raw_spin_lock_irqsave(dom->lock, flags);
+        idx = mc_ce_schedule_at(litmus_clock(), dom);
+        pid_should_be_running = ce_data->pid_entries[idx].pid;
+        rcu_read_lock();
+        should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
+        rcu_read_unlock();
+        if (running) {
+                /* admit task checks that the task is not on the wrong CPU */
+                BUG_ON(task_cpu(ts) != get_partition(ts));
+                BUG_ON(ce_data->scheduled);
+                ce_data->scheduled = ts;
+                if (!should_be_running)
+                        preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+        } else if (!running && should_be_running) {
+                ce_data->should_schedule = ts;
+                preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+        }
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/*
+ * Called to re-introduce a task after blocking.
+ * Can potentailly be called multiple times.
+ */
+static void mc_ce_task_wake_up(struct task_struct *ts)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct ce_dom_data *ce_data = dom->data;
+        unsigned long flags;
+        raw_spin_lock_irqsave(dom->lock, flags);
+        if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
+                preempt_if_preemptable(ts, ce_data->cpu);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/*
+ * Called to notify the plugin of a blocking real-time tasks. Only called for
+ * real-time tasks and before schedule is called.
+ */
+static void mc_ce_task_block(struct task_struct *ts)
+{
+        /* nothing to do because it will be taken care of in schedule */
+}
+/*
+ * The complete_job function is called when the complete_job syscall
+ * is called from user land.
+ */
+static long mc_ce_complete_job(void)
+{
+        /* TODO */
+        printk(KERN_EMERG "complete job called TODO\n");
+        BUG();
+        return 0;
+}
+/*
+ * Called when a task switches from RT mode back to normal mode.
+ */
+static void mc_ce_task_exit(struct task_struct *ts)
+{
+        domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+        struct ce_dom_data *ce_data = dom->data;
+        unsigned long flags;
+        struct pid *pid;
+        const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;;
+        BUG_ON(task_cpu(ts) != get_partition(ts));
+        BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
+        BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
+        raw_spin_lock_irqsave(dom->lock, flags);
+        pid = ce_data->pid_entries[lvl_a_id].pid;
+        BUG_ON(!pid);
+        put_pid(pid);
+        ce_data->pid_entries[lvl_a_id].pid = NULL;
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/***********************************************************
+ * Timer stuff
+ **********************************************************/
+/*
+ * What to do when a timer fires. The timer should only be armed if the number
+ * of PID entries is positive.
+ */
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+        struct ce_dom_data *ce_data = container_of(timer,
+                        struct ce_dom_data, timer);
+        domain_t *dom = &per_cpu(mc_ce_doms, ce_data->cpu);
+        /* relative and absolute times for cycles */
+        lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
+        struct ce_dom_pid_entry *pid_entry;
+        unsigned long flags;
+        int idx;
+        raw_spin_lock_irqsave(dom->lock, flags);
+        now = litmus_clock();
+        offset_rel = (now - atomic64_read(&start_time)) % ce_data->cycle_time;
+        cycle_start_abs = now - offset_rel;
+        idx = mc_ce_schedule_at(now, dom);
+        pid_entry = &ce_data->pid_entries[idx];
+        next_timer_abs = cycle_start_abs + pid_entry->acc_time;
+        hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
+        /* get the task_struct (pid_task can accept a NULL) */
+        rcu_read_lock();
+        ce_data->should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
+        rcu_read_unlock();
+        if (ce_data->scheduled != ce_data->should_schedule)
+                preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+        return HRTIMER_RESTART;
+}
+/*
+ * Cancel timers on all CPUs. Returns 1 if any were active.
+ */
+static int cancel_all_timers(void)
+{
+        struct ce_dom_data *ce_data;
+        domain_t *dom;
+        int cpu, ret = 0;
+        for_each_online_cpu(cpu) {
+                dom = &per_cpu(mc_ce_doms, cpu);
+                ce_data = dom->data;
+                ret = ret || hrtimer_cancel(&ce_data->timer);
+        }
+        return ret;
+}
+/*
+ * Arm all timers so that they start at the new value of start time.
+ * Any CPU without CE PID entries won't have a timer armed.
+ * All timers should be canceled before calling this.
+ */
+static void arm_all_timers(void)
+{
+        struct ce_dom_data *ce_data;
+        domain_t *dom;
+        int cpu;
+        const lt_t start = atomic64_read(&start_time);
+        for_each_online_cpu(cpu) {
+                dom = &per_cpu(mc_ce_doms, cpu);
+                ce_data = dom->data;
+                if (0 == ce_data->num_pid_entries)
+                        continue;
+                hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer,
+                                ns_to_ktime(start), HRTIMER_MODE_ABS_PINNED);
+        }
+}
+/*
+ * There are no real releases in the CE, but the task releasing code will
+ * call this. We can re-set our notion of the CE period start to make
+ * the schedule line up.
+ */
+static void mc_ce_release_at(struct task_struct *ts, lt_t start)
+{
+        if (atomic_inc_and_test(&start_time_set)) {
+                /* in this case, we won the race */
+                atomic64_set(&start_time, start);
+                cancel_all_timers();
+                arm_all_timers();
+        } else
+                atomic_dec(&start_time_set);
+}
+static long mc_ce_activate_plugin(void)
+{
+        struct ce_dom_data *ce_data;
+        domain_t *dom;
+        int cpu;
+        for_each_online_cpu(cpu) {
+                dom = &per_cpu(mc_ce_doms, cpu);
+                ce_data = dom->data;
+                ce_data->scheduled = NULL;
+                ce_data->should_schedule= NULL;
+        }
+        atomic_set(&start_time_set, 0);
+        atomic64_set(&start_time, litmus_clock());
+        arm_all_timers();
+        return 0;
+}
+static long mc_ce_deactivate_plugin(void)
+{
+        domain_t *dom;
+        struct ce_dom_data *ce_data;
+        int cpu;
+        cancel_all_timers();
+        for_each_online_cpu(cpu) {
+                dom = &per_cpu(mc_ce_doms, cpu);
+                ce_data = dom->data;
+                atomic_set(&ce_data->timer_info.state,
+                                HRTIMER_START_ON_INACTIVE);
+        }
+        return 0;
+}
+/*      Plugin object   */
+static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "MC-CE",
+        .admit_task             = mc_ce_admit_task,
+        .task_new               = mc_ce_task_new,
+        .complete_job           = mc_ce_complete_job,
+        .release_at             = mc_ce_release_at,
+        .task_exit              = mc_ce_task_exit,
+        .schedule               = mc_ce_schedule,
+        .finish_switch          = mc_ce_finish_switch,
+        .tick                   = mc_ce_tick,
+        .task_wake_up           = mc_ce_task_wake_up,
+        .task_block             = mc_ce_task_block,
+        .activate_plugin        = mc_ce_activate_plugin,
+        .deactivate_plugin      = mc_ce_deactivate_plugin,
+};
+static int __init init_sched_mc_ce(void)
+{
+        struct ce_dom_data *ce_data;
+        domain_t *dom;
+        int cpu, i;
+        for_each_online_cpu(cpu) {
+                dom = &per_cpu(mc_ce_doms, cpu);
+                pd_domain_init(dom, NULL, NULL, NULL, NULL);
+                dom->data = &per_cpu(_mc_ce_dom_data, cpu);
+                ce_data = dom->data;
+                hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+                hrtimer_start_on_info_init(&ce_data->timer_info);
+                ce_data->cpu = cpu;
+                ce_data->num_pid_entries = 0;
+                ce_data->cycle_time = 0;
+                ce_data->timer.function = timer_callback;
+                for (i = 0; i < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS; ++i) {
+                        ce_data->pid_entries[i].pid = NULL;
+                        ce_data->pid_entries[i].exec_cost = 0;
+                        ce_data->pid_entries[i].acc_time = 0;
+                }
+        }
+        return register_sched_plugin(&mc_ce_plugin);
+}
+module_init(init_sched_mc_ce);
author	Christopher Kenna <cjk@cs.unc.edu>	2011-09-05 22:57:09 -0400
committer	Christopher Kenna <cjk@cs.unc.edu>	2011-09-05 22:57:09 -0400
commit	1d516ebe47adcc6998f6bb8dbee7942e20f6eaf9 (patch)
tree	4b93ef3d9bfe31295435823f5565778aa83fa0c8
parent	d5e965b0074d6ef10f5a77112fc3671613a2150f (diff)

diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h index 66910773f430..266f89172f19 100644 --- a/include/litmus/sched_mc.h +++ b/include/litmus/sched_mc.h
@@ -15,6 +15,7 @@ enum crit_level {
15		15
16	struct mc_task {	16	struct mc_task {
17	enum crit_level crit;	17	enum crit_level crit;
		18	int lvl_a_id;
18	};	19	};
19		20
20	struct mc_job {	21	struct mc_job {


diff --git a/litmus/Kconfig b/litmus/Kconfig index 9a1cc2436580..b8f6a9159eb2 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig
@@ -32,6 +32,14 @@ config PLUGIN_MC
32		32
33	If unsure, say Yes.	33	If unsure, say Yes.
34		34
		35	config PLUGIN_MC_LEVEL_A_MAX_TASKS
		36	int "Maximum level A tasks"
		37	depends on PLUGIN_MC
		38	range 1 128
		39	default 32
		40	help
		41	The maximum number of level A tasks allowed (per-cpu) in level A.
		42
35	config RELEASE_MASTER	43	config RELEASE_MASTER
36	bool "Release-master Support"	44	bool "Release-master Support"
37	depends on ARCH_HAS_SEND_PULL_TIMERS	45	depends on ARCH_HAS_SEND_PULL_TIMERS


diff --git a/litmus/Makefile b/litmus/Makefile index 7d7003592138..03dc31a12711 100644 --- a/litmus/Makefile +++ b/litmus/Makefile
@@ -22,7 +22,7 @@ obj-y = sched_plugin.o litmus.o \
22		22
23	obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o	23	obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
24	obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o	24	obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
25	obj-$(CONFIG_PLUGIN_MC) += sched_mc.o	25	obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o
26		26
27	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o	27	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
28	obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o	28	obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o


diff --git a/litmus/litmus.c b/litmus/litmus.c index 16b3aeda5615..7db9fdadc7db 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c
@@ -314,17 +314,25 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param
314	goto out_unlock;	314	goto out_unlock;
315	}	315	}
316		316
		317	/* check parameters passed in are valid */
317	if (mc.crit < CRIT_LEVEL_A \|\| mc.crit > CRIT_LEVEL_D)	318	if (mc.crit < CRIT_LEVEL_A \|\| mc.crit > CRIT_LEVEL_D)
318	{	319	{
319	printk(KERN_WARNING "litmus: real-time task %d rejected because "	320	printk(KERN_WARNING "litmus: real-time task %d rejected because "
320	"of invalid criticality level\n", pid);	321	"of invalid criticality level\n", pid);
321	goto out_unlock;	322	goto out_unlock;
322	}	323	}
		324	if (CRIT_LEVEL_A == mc.crit &&
		325	(mc.lvl_a_id < 0 \|\|
		326	mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS))
		327	{
		328	printk(KERN_WARNING "litmus: real-time task %d rejected because "
		329	"of invalid level A id\n", pid);
		330	goto out_unlock;
		331	}
323		332
324	mc_data = tsk_rt(target)->mc_data;	333	mc_data = tsk_rt(target)->mc_data;
325	if (!mc_data)	334	if (!mc_data) {
326	{	335	mc_data = kzalloc(sizeof(*mc_data), GFP_ATOMIC);
327	mc_data = kmalloc(sizeof(*mc_data), GFP_ATOMIC);
328	if (!mc_data)	336	if (!mc_data)
329	{	337	{
330	retval = -ENOMEM;	338	retval = -ENOMEM;
@@ -332,8 +340,8 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param
332	}	340	}
333	tsk_rt(target)->mc_data = mc_data;	341	tsk_rt(target)->mc_data = mc_data;
334	}	342	}
335	mc_data->mc_task.crit = mc.crit;
336		343
		344	mc_data->mc_task = mc;
337	retval = 0;	345	retval = 0;
338	out_unlock:	346	out_unlock:
339	read_unlock_irq(&tasklist_lock);	347	read_unlock_irq(&tasklist_lock);


diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c new file mode 100644 index 000000000000..21d1d8789c2b --- /dev/null +++ b/litmus/sched_mc_ce.c
@@ -0,0 +1,495 @@
		1	/**
		2	* litmus/sched_mc_ce.c
		3	*
		4	* The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
		5	* algorithm.
		6	*/
		7
		8	#include <asm/atomic.h>
		9
		10	#include <linux/module.h>
		11	#include <linux/percpu.h>
		12	#include <linux/hrtimer.h>
		13	#include <linux/pid.h>
		14	#include <linux/sched.h>
		15
		16	#include <litmus/litmus.h>
		17	#include <litmus/sched_plugin.h>
		18	#include <litmus/rt_domain.h>
		19	#include <litmus/rt_param.h>
		20	#include <litmus/sched_mc.h>
		21
		22	static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
		23
		24	#define tsk_mc_data(t) (tsk_rt(t)->mc_data)
		25	#define tsk_mc_crit(t) (tsk_mc_data(t)->mc_task.crit)
		26	#define is_active_plugin() (litmus == mc_ce_plugin)
		27
		28	static atomic_t start_time_set = ATOMIC_INIT(0);
		29	static atomic64_t start_time = ATOMIC64_INIT(0);
		30
		31	/*
		32	* Cache the budget along with the struct PID for a task so that we don't need
		33	* to fetch its task_struct every time we check to see what should be
		34	* scheduled.
		35	*/
		36	struct ce_dom_pid_entry {
		37	struct pid *pid;
		38	lt_t exec_cost;
		39	lt_t acc_time;
		40	};
		41
		42	struct ce_dom_data {
		43	int cpu;
		44	struct task_struct scheduled, should_schedule;
		45	/*
		46	* Each CPU needs a mapping of level A ID (integer) to struct pid so
		47	* that we can get its task struct.
		48	*/
		49	struct ce_dom_pid_entry pid_entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
		50	int num_pid_entries;
		51	lt_t cycle_time;
		52	struct hrtimer_start_on_info timer_info;
		53	struct hrtimer timer;
		54	};
		55
		56	DEFINE_PER_CPU(domain_t, mc_ce_doms);
		57	DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
		58
		59	/*
		60	* Return the index into the PID entries table of what to schedule next.
		61	* Don't call if the table is empty. Assumes the caller has the domain lock.
		62	*
		63	* TODO Currently O(n) in the number of tasks on the CPU. Binary search?
		64	*/
		65	static int mc_ce_schedule_at(const lt_t when, const domain_t *dom)
		66	{
		67	const struct ce_dom_data *ce_data = dom->data;
		68	const struct ce_dom_pid_entry *pid_entry = NULL;
		69	lt_t offset;
		70	int i;
		71
		72	BUG_ON(ce_data->cycle_time < 1);
		73	BUG_ON(ce_data->num_pid_entries < 1);
		74
		75	offset = (when - atomic64_read(&start_time)) % ce_data->cycle_time;
		76	for (i = 0; i < ce_data->num_pid_entries; ++i) {
		77	pid_entry = &ce_data->pid_entries[i];
		78	if (offset < pid_entry->acc_time) {
		79	/* found task to schedule in this window */
		80	break;
		81	}
		82	}
		83	/* can only happen if cycle_time is not right */
		84	BUG_ON(pid_entry->acc_time > ce_data->cycle_time);
		85	return i;
		86	}
		87
		88	static struct task_struct mc_ce_schedule(struct task_struct prev)
		89	{
		90	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
		91	struct ce_dom_data *ce_data = dom->data;
		92	struct task_struct *next = NULL;
		93	/* for states */
		94	int exists, np, preempt;
		95
		96	raw_spin_lock(dom->lock);
		97
		98	/* sanity checking */
		99	BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
		100	BUG_ON(ce_data->scheduled && !is_realtime(prev));
		101	BUG_ON(is_realtime(prev) && !ce_data->scheduled);
		102
		103	/* figure out state */
		104	exists = ce_data->scheduled != NULL;
		105	np = exists && is_np(ce_data->scheduled);
		106	preempt = ce_data->scheduled != ce_data->should_schedule;
		107
		108	if (np) {
		109	/* scheduled real time task needs to continue */
		110	request_exit_np(ce_data->scheduled);
		111	next = prev;
		112	} else if (ce_data->should_schedule &&
		113	is_running(ce_data->should_schedule)) {
		114	/* schedule the task for this period if it's not blocked */
		115	next = ce_data->should_schedule;
		116	}
		117
		118	sched_state_task_picked();
		119	raw_spin_unlock(dom->lock);
		120	return next;
		121	}
		122
		123	static void mc_ce_finish_switch(struct task_struct *prev)
		124	{
		125	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
		126	struct ce_dom_data *ce_data = dom->data;
		127
		128	if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
		129	ce_data->scheduled = current;
		130	else
		131	ce_data->scheduled = NULL;
		132	}
		133
		134	/*
		135	* Called for every local timer interrupt.
		136	* Linux calls this with interrupts disabled, AFAIK.
		137	*/
		138	static void mc_ce_tick(struct task_struct *ts)
		139	{
		140	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
		141	struct ce_dom_data *ce_data = dom->data;
		142	struct task_struct *should_schedule;
		143
		144	if (is_realtime(ts) && CRIT_LEVEL_A == tsk_mc_crit(ts)) {
		145	raw_spin_lock(dom->lock);
		146	should_schedule = ce_data->should_schedule;
		147	raw_spin_unlock(dom->lock);
		148
		149	if (!is_np(ts) && ts != should_schedule) {
		150	litmus_reschedule_local();
		151	} else if (is_user_np(ts)) {
		152	request_exit_np(ts);
		153	}
		154	}
		155	}
		156
		157	/*
		158	* Admit task called to see if this task is permitted to enter the system.
		159	* Here we look up the task's PID structure and save it in the proper slot on
		160	* the CPU this task will run on.
		161	*/
		162	static long mc_ce_admit_task(struct task_struct *ts)
		163	{
		164	domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
		165	struct ce_dom_data *ce_data = dom->data;
		166	struct mc_data *mcd = tsk_mc_data(ts);
		167	struct pid *pid = NULL;
		168	long retval = -EINVAL;
		169	unsigned long flags;
		170	const int lvl_a_id = mcd->mc_task.lvl_a_id;
		171
		172	/* check the task has migrated to the right CPU (like in sched_cedf) */
		173	if (task_cpu(ts) != get_partition(ts)) {
		174	printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
		175	ts->pid, task_cpu(ts), get_partition(ts));
		176	goto out;
		177	}
		178
		179	/* only level A tasks can be CE */
		180	if (!mcd \|\| CRIT_LEVEL_A != tsk_mc_crit(ts)) {
		181	printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
		182	ts->pid);
		183	goto out;
		184	}
		185
		186	/* try and get the task's PID structure */
		187	pid = get_task_pid(ts, PIDTYPE_PID);
		188	if (IS_ERR_OR_NULL(pid)) {
		189	printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
		190	ts->pid);
		191	goto out;
		192	}
		193
		194	raw_spin_lock_irqsave(dom->lock, flags);
		195	if (lvl_a_id >= ce_data->num_pid_entries) {
		196	printk(KERN_INFO "litmus: level A id greater than expected "
		197	"number of tasks %d for %d cpu %d\n",
		198	ce_data->num_pid_entries, ts->pid,
		199	get_partition(ts));
		200	goto out_put_pid_unlock;
		201	}
		202	if (ce_data->pid_entries[lvl_a_id].pid) {
		203	printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
		204	lvl_a_id, get_partition(ts));
		205	goto out_put_pid_unlock;
		206	}
		207	if (get_exec_cost(ts) != ce_data->pid_entries[lvl_a_id].exec_cost) {
		208	printk(KERN_INFO "litmus: saved exec cost %llu and task exec "
		209	"cost %llu differ\n",
		210	ce_data->pid_entries[lvl_a_id].exec_cost,
		211	get_exec_cost(ts));
		212	goto out_put_pid_unlock;
		213	}
		214	ce_data->pid_entries[lvl_a_id].pid = pid;
		215	retval = 0;
		216	/* don't call put_pid if we are successful */
		217	goto out_unlock;
		218
		219	out_put_pid_unlock:
		220	put_pid(pid);
		221	out_unlock:
		222	raw_spin_unlock_irqrestore(dom->lock, flags);
		223	out:
		224	return retval;
		225	}
		226
		227	/*
		228	* Called to set up a new real-time task (after the admit_task callback).
		229	* At this point the task's struct PID is already hooked up on the destination
		230	* CPU. The task may already be running.
		231	*/
		232	static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
		233	{
		234	domain_t *dom = &per_cpu(mc_ce_doms, task_cpu(ts));
		235	struct ce_dom_data *ce_data = dom->data;
		236	struct pid *pid_should_be_running;
		237	unsigned long flags;
		238	int idx, should_be_running;
		239
		240	raw_spin_lock_irqsave(dom->lock, flags);
		241	idx = mc_ce_schedule_at(litmus_clock(), dom);
		242	pid_should_be_running = ce_data->pid_entries[idx].pid;
		243	rcu_read_lock();
		244	should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
		245	rcu_read_unlock();
		246	if (running) {
		247	/* admit task checks that the task is not on the wrong CPU */
		248	BUG_ON(task_cpu(ts) != get_partition(ts));
		249	BUG_ON(ce_data->scheduled);
		250	ce_data->scheduled = ts;
		251
		252	if (!should_be_running)
		253	preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
		254	} else if (!running && should_be_running) {
		255	ce_data->should_schedule = ts;
		256	preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
		257	}
		258	raw_spin_unlock_irqrestore(dom->lock, flags);
		259	}
		260
		261	/*
		262	* Called to re-introduce a task after blocking.
		263	* Can potentailly be called multiple times.
		264	*/
		265	static void mc_ce_task_wake_up(struct task_struct *ts)
		266	{
		267	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
		268	struct ce_dom_data *ce_data = dom->data;
		269	unsigned long flags;
		270
		271	raw_spin_lock_irqsave(dom->lock, flags);
		272	if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
		273	preempt_if_preemptable(ts, ce_data->cpu);
		274	raw_spin_unlock_irqrestore(dom->lock, flags);
		275	}
		276
		277	/*
		278	* Called to notify the plugin of a blocking real-time tasks. Only called for
		279	* real-time tasks and before schedule is called.
		280	*/
		281	static void mc_ce_task_block(struct task_struct *ts)
		282	{
		283	/* nothing to do because it will be taken care of in schedule */
		284	}
		285
		286	/*
		287	* The complete_job function is called when the complete_job syscall
		288	* is called from user land.
		289	*/
		290	static long mc_ce_complete_job(void)
		291	{
		292	/* TODO */
		293	printk(KERN_EMERG "complete job called TODO\n");
		294	BUG();
		295	return 0;
		296	}
		297
		298	/*
		299	* Called when a task switches from RT mode back to normal mode.
		300	*/
		301	static void mc_ce_task_exit(struct task_struct *ts)
		302	{
		303	domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
		304	struct ce_dom_data *ce_data = dom->data;
		305	unsigned long flags;
		306	struct pid *pid;
		307	const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;;
		308
		309	BUG_ON(task_cpu(ts) != get_partition(ts));
		310	BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
		311	BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
		312
		313	raw_spin_lock_irqsave(dom->lock, flags);
		314	pid = ce_data->pid_entries[lvl_a_id].pid;
		315	BUG_ON(!pid);
		316	put_pid(pid);
		317	ce_data->pid_entries[lvl_a_id].pid = NULL;
		318	raw_spin_unlock_irqrestore(dom->lock, flags);
		319	}
		320
		321	/***********************************************************
		322	* Timer stuff
		323	**********************************************************/
		324
		325	/*
		326	* What to do when a timer fires. The timer should only be armed if the number
		327	* of PID entries is positive.
		328	*/
		329	static enum hrtimer_restart timer_callback(struct hrtimer *timer)
		330	{
		331	struct ce_dom_data *ce_data = container_of(timer,
		332	struct ce_dom_data, timer);
		333	domain_t *dom = &per_cpu(mc_ce_doms, ce_data->cpu);
		334	/* relative and absolute times for cycles */
		335	lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
		336	struct ce_dom_pid_entry *pid_entry;
		337	unsigned long flags;
		338	int idx;
		339
		340	raw_spin_lock_irqsave(dom->lock, flags);
		341
		342	now = litmus_clock();
		343	offset_rel = (now - atomic64_read(&start_time)) % ce_data->cycle_time;
		344	cycle_start_abs = now - offset_rel;
		345	idx = mc_ce_schedule_at(now, dom);
		346	pid_entry = &ce_data->pid_entries[idx];
		347	next_timer_abs = cycle_start_abs + pid_entry->acc_time;
		348	hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
		349	/* get the task_struct (pid_task can accept a NULL) */
		350	rcu_read_lock();
		351	ce_data->should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
		352	rcu_read_unlock();
		353	if (ce_data->scheduled != ce_data->should_schedule)
		354	preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
		355
		356	raw_spin_unlock_irqrestore(dom->lock, flags);
		357
		358	return HRTIMER_RESTART;
		359	}
		360
		361	/*
		362	* Cancel timers on all CPUs. Returns 1 if any were active.
		363	*/
		364	static int cancel_all_timers(void)
		365	{
		366	struct ce_dom_data *ce_data;
		367	domain_t *dom;
		368	int cpu, ret = 0;
		369
		370	for_each_online_cpu(cpu) {
		371	dom = &per_cpu(mc_ce_doms, cpu);
		372	ce_data = dom->data;
		373	ret = ret \|\| hrtimer_cancel(&ce_data->timer);
		374	}
		375	return ret;
		376	}
		377
		378	/*
		379	* Arm all timers so that they start at the new value of start time.
		380	* Any CPU without CE PID entries won't have a timer armed.
		381	* All timers should be canceled before calling this.
		382	*/
		383	static void arm_all_timers(void)
		384	{
		385	struct ce_dom_data *ce_data;
		386	domain_t *dom;
		387	int cpu;
		388	const lt_t start = atomic64_read(&start_time);
		389
		390	for_each_online_cpu(cpu) {
		391	dom = &per_cpu(mc_ce_doms, cpu);
		392	ce_data = dom->data;
		393	if (0 == ce_data->num_pid_entries)
		394	continue;
		395	hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer,
		396	ns_to_ktime(start), HRTIMER_MODE_ABS_PINNED);
		397	}
		398	}
		399
		400	/*
		401	* There are no real releases in the CE, but the task releasing code will
		402	* call this. We can re-set our notion of the CE period start to make
		403	* the schedule line up.
		404	*/
		405	static void mc_ce_release_at(struct task_struct *ts, lt_t start)
		406	{
		407	if (atomic_inc_and_test(&start_time_set)) {
		408	/* in this case, we won the race */
		409	atomic64_set(&start_time, start);
		410	cancel_all_timers();
		411	arm_all_timers();
		412	} else
		413	atomic_dec(&start_time_set);
		414	}
		415
		416	static long mc_ce_activate_plugin(void)
		417	{
		418	struct ce_dom_data *ce_data;
		419	domain_t *dom;
		420	int cpu;
		421
		422	for_each_online_cpu(cpu) {
		423	dom = &per_cpu(mc_ce_doms, cpu);
		424	ce_data = dom->data;
		425	ce_data->scheduled = NULL;
		426	ce_data->should_schedule= NULL;
		427	}
		428
		429	atomic_set(&start_time_set, 0);
		430	atomic64_set(&start_time, litmus_clock());
		431	arm_all_timers();
		432	return 0;
		433	}
		434
		435	static long mc_ce_deactivate_plugin(void)
		436	{
		437	domain_t *dom;
		438	struct ce_dom_data *ce_data;
		439	int cpu;
		440
		441	cancel_all_timers();
		442	for_each_online_cpu(cpu) {
		443	dom = &per_cpu(mc_ce_doms, cpu);
		444	ce_data = dom->data;
		445	atomic_set(&ce_data->timer_info.state,
		446	HRTIMER_START_ON_INACTIVE);
		447	}
		448	return 0;
		449	}
		450
		451	/* Plugin object */
		452	static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
		453	.plugin_name = "MC-CE",
		454	.admit_task = mc_ce_admit_task,
		455	.task_new = mc_ce_task_new,
		456	.complete_job = mc_ce_complete_job,
		457	.release_at = mc_ce_release_at,
		458	.task_exit = mc_ce_task_exit,
		459	.schedule = mc_ce_schedule,
		460	.finish_switch = mc_ce_finish_switch,
		461	.tick = mc_ce_tick,
		462	.task_wake_up = mc_ce_task_wake_up,
		463	.task_block = mc_ce_task_block,
		464	.activate_plugin = mc_ce_activate_plugin,
		465	.deactivate_plugin = mc_ce_deactivate_plugin,
		466	};
		467
		468	static int __init init_sched_mc_ce(void)
		469	{
		470	struct ce_dom_data *ce_data;
		471	domain_t *dom;
		472	int cpu, i;
		473
		474	for_each_online_cpu(cpu) {
		475	dom = &per_cpu(mc_ce_doms, cpu);
		476	pd_domain_init(dom, NULL, NULL, NULL, NULL);
		477	dom->data = &per_cpu(_mc_ce_dom_data, cpu);
		478	ce_data = dom->data;
		479	hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
		480	hrtimer_start_on_info_init(&ce_data->timer_info);
		481	ce_data->cpu = cpu;
		482	ce_data->num_pid_entries = 0;
		483	ce_data->cycle_time = 0;
		484	ce_data->timer.function = timer_callback;
		485
		486	for (i = 0; i < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS; ++i) {
		487	ce_data->pid_entries[i].pid = NULL;
		488	ce_data->pid_entries[i].exec_cost = 0;
		489	ce_data->pid_entries[i].acc_time = 0;
		490	}
		491	}
		492	return register_sched_plugin(&mc_ce_plugin);
		493	}
		494
		495	module_init(init_sched_mc_ce);